diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -68,6 +68,9 @@ bool isSupportedStackID(TargetStackID::Value ID) const override; TargetStackID::Value getStackIDForScalableVectors() const override; + void processFunctionBeforeFrameIndicesReplaced( + MachineFunction &MF, RegScavenger *RS = nullptr) const override; + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -480,6 +480,9 @@ } } + if (RVVStackSize) + adjustStackForRVV(MF, MBB, MBBI, DL, -RVVStackSize); + if (hasFP(MF)) { // Realign Stack const RISCVRegisterInfo *RI = STI.getRegisterInfo(); @@ -511,9 +514,6 @@ } } } - - if (RVVStackSize) - adjustStackForRVV(MF, MBB, MBBI, DL, -RVVStackSize); } void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, @@ -645,35 +645,40 @@ if (hasBP(MF)) { FrameReg = RISCVABI::getBPReg(); // |--------------------------| -- <-- FP - // | callee-saved registers | | - // |--------------------------| | MFI.getStackSize() - // | scalar local variables | | - // |--------------------------| -- - // | Realignment | | + // | callee-saved registers | | <---------. + // |--------------------------| -- | + // | realignment (the size of | | | + // | this area is not counted | | | + // | in MFI.getStackSize()) | | | + // |--------------------------| -- |-- MFI.getStackSize() + // | RVV objects | | | + // |--------------------------| -- | + // | scalar local variables | | <---------' // |--------------------------| -- <-- BP - // | RVV objects | | RVFI->getRVVStackSize() - // |--------------------------| -- // | VarSize objects | | // |--------------------------| -- <-- SP } else { FrameReg = RISCV::X2; - // When using SP to access frame objects, we need to add RVV stack size. - // // |--------------------------| -- <-- FP - // | callee-saved registers | | - // |--------------------------| | MFI.getStackSize() - // | scalar local variables | | - // |--------------------------| -- - // | Realignment | | - // |--------------------------| -- - // | RVV objects | | RVFI->getRVVStackSize() + // | callee-saved registers | | <---------. + // |--------------------------| -- | + // | realignment (the size of | | | + // | this area is not counted | | | + // | in MFI.getStackSize()) | | | + // |--------------------------| -- |-- MFI.getStackSize() + // | RVV objects | | | + // |--------------------------| -- | + // | scalar local variables | | <---------' // |--------------------------| -- <-- SP - Offset += StackOffset::getScalable(RVFI->getRVVStackSize()); } if (MFI.getStackID(FI) == TargetStackID::Default) { Offset += StackOffset::getFixed(MFI.getStackSize()); if (FI < 0) Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); + } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + Offset += + StackOffset::get(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), + RVFI->getRVVStackSize()); } } else { FrameReg = RI->getFrameRegister(MF); @@ -699,17 +704,20 @@ // When using SP to access frame objects, we need to add RVV stack size. // // |--------------------------| -- <-- FP - // | callee-saved registers | | - // |--------------------------| | MFI.getStackSize() - // | scalar local variables | | - // |--------------------------| -- - // | RVV objects | | RVFI->getRVVStackSize() + // | callee-saved registers | |<--------. + // |--------------------------| -- | + // | RVV objects | | |-- MFI.getStackSize() + // |--------------------------| -- | + // | scalar local variables | |<--------' // |--------------------------| -- <-- SP - Offset += StackOffset::getScalable(RVFI->getRVVStackSize()); if (MFI.getStackID(FI) == TargetStackID::Default) { Offset += StackOffset::getFixed(MFI.getStackSize()); if (FI < 0) Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); + } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + Offset += StackOffset::get(MFI.getStackSize() - + RVFI->getCalleeSavedStackSize(), + RVFI->getRVVStackSize()); } } } @@ -798,21 +806,48 @@ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterClass *RC = &RISCV::GPRRegClass; + auto *RVFI = MF.getInfo(); + + int64_t RVVStackSize = assignRVVStackObjectOffsets(MFI); + RVFI->setRVVStackSize(RVVStackSize); + // estimateStackSize has been observed to under-estimate the final stack // size, so give ourselves wiggle-room by checking for stack size // representable an 11-bit signed field rather than 12-bits. // FIXME: It may be possible to craft a function with a small stack that // still needs an emergency spill slot for branch relaxation. This case // would currently be missed. - if (!isInt<11>(MFI.estimateStackSize(MF))) { + if (!isInt<11>(MFI.estimateStackSize(MF)) || RVVStackSize != 0) { int RegScavFI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC), RegInfo->getSpillAlign(*RC), false); RS->addScavengingFrameIndex(RegScavFI); } +} +void RISCVFrameLowering::processFunctionBeforeFrameIndicesReplaced( + MachineFunction &MF, RegScavenger *RS) const { auto *RVFI = MF.getInfo(); - int64_t RVVStackSize = assignRVVStackObjectOffsets(MFI); - RVFI->setRVVStackSize(RVVStackSize); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF)) { + RVFI->setCalleeSavedStackSize(0); + return; + } + + int64_t MinOffset = std::numeric_limits::max(); + int64_t MaxOffset = std::numeric_limits::min(); + for (const auto &Info : MFI.getCalleeSavedInfo()) { + int FrameIdx = Info.getFrameIdx(); + if (MFI.getStackID(FrameIdx) != TargetStackID::Default) + continue; + + int64_t Offset = MFI.getObjectOffset(FrameIdx); + int64_t ObjSize = MFI.getObjectSize(FrameIdx); + MinOffset = std::min(Offset, MinOffset); + MaxOffset = std::max(Offset + ObjSize, MaxOffset); + } + + unsigned Size = alignTo(MaxOffset - MinOffset, 16); + RVFI->setCalleeSavedStackSize(Size); } // Not preserve stack space within prologue for outgoing variables when the diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -34,6 +34,8 @@ unsigned LibCallStackSize = 0; /// Size of RVV stack. uint64_t RVVStackSize = 0; + /// Size of stack frame to save callee saved registers + unsigned CalleeSavedStackSize = 0; public: RISCVMachineFunctionInfo(const MachineFunction &MF) {} @@ -63,6 +65,9 @@ uint64_t getRVVStackSize() const { return RVVStackSize; } void setRVVStackSize(uint64_t Size) { RVVStackSize = Size; } + + unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } + void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll --- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll @@ -34,14 +34,11 @@ ; RV64IV-NEXT: .cfi_def_cfa_offset 528 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 8 +; RV64IV-NEXT: addi a0, sp, 8 ; RV64IV-NEXT: vl1re64.v v25, (a0) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: ld a0, 520(a0) -; RV64IV-NEXT: vl1re64.v v26, (sp) +; RV64IV-NEXT: ld a0, 520(sp) +; RV64IV-NEXT: addi a1, sp, 528 +; RV64IV-NEXT: vl1re64.v v26, (a1) ; RV64IV-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; RV64IV-NEXT: vadd.vv v8, v25, v26 ; RV64IV-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -5,10 +5,12 @@ define void @lmul1() nounwind { ; CHECK-LABEL: lmul1: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = alloca ret void @@ -17,12 +19,14 @@ define void @lmul2() nounwind { ; CHECK-LABEL: lmul2: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = alloca ret void @@ -35,10 +39,10 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -32 ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -55,10 +59,10 @@ ; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 64 -; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: addi sp, s0, -64 ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -71,6 +75,7 @@ define void @lmul1_and_2() nounwind { ; CHECK-LABEL: lmul1_and_2: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 3 ; CHECK-NEXT: mul a0, a0, a1 @@ -79,6 +84,7 @@ ; CHECK-NEXT: addi a1, zero, 3 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -92,11 +98,11 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 6 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -32 ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -114,11 +120,11 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 5 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -32 ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -132,6 +138,7 @@ define void @lmul2_and_1() nounwind { ; CHECK-LABEL: lmul2_and_1: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 3 ; CHECK-NEXT: mul a0, a0, a1 @@ -140,6 +147,7 @@ ; CHECK-NEXT: addi a1, zero, 3 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -150,17 +158,17 @@ ; CHECK-LABEL: lmul4_and_1: ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 5 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret %v1 = alloca @@ -172,17 +180,17 @@ ; CHECK-LABEL: lmul4_and_2: ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 6 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret %v1 = alloca @@ -197,11 +205,11 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 12 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -32 ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -221,11 +229,11 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 12 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi sp, s0, -32 ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -248,11 +256,7 @@ ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: addi a0, zero, 3 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: addi a2, zero, 3 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: sd a0, 8(a1) +; CHECK-NEXT: sd a0, 8(sp) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 3 ; CHECK-NEXT: mul a0, a0, a1 @@ -273,17 +277,13 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 5 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 ; CHECK-NEXT: addi a0, zero, 3 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: addi a2, zero, 5 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: sd a0, 8(a1) +; CHECK-NEXT: sd a0, 8(sp) ; CHECK-NEXT: addi sp, s0, -32 ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -303,11 +303,11 @@ ; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 64 -; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 15 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: addi sp, s0, -64 ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -327,11 +327,11 @@ ; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 64 -; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 30 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: addi sp, s0, -64 ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -355,11 +355,11 @@ ; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 64 -; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a1, zero, 30 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: addi sp, s0, -64 ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -379,12 +379,14 @@ define void @masks() nounwind { ; CHECK-LABEL: masks: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll --- a/llvm/test/CodeGen/RISCV/rvv/localvar.ll +++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -5,18 +5,22 @@ define void @local_var_mf8() { ; RV64IV-LABEL: local_var_mf8: ; RV64IV: # %bb.0: -; RV64IV-NEXT: .cfi_def_cfa_offset 0 +; RV64IV-NEXT: addi sp, sp, -16 +; RV64IV-NEXT: .cfi_def_cfa_offset 16 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 ; RV64IV-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vle8.v v25, (a0) +; RV64IV-NEXT: addi a0, sp, 16 ; RV64IV-NEXT: vle8.v v25, (a0) -; RV64IV-NEXT: vle8.v v25, (sp) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: addi sp, sp, 16 ; RV64IV-NEXT: ret %local0 = alloca %local1 = alloca @@ -28,17 +32,21 @@ define void @local_var_m1() { ; RV64IV-LABEL: local_var_m1: ; RV64IV: # %bb.0: -; RV64IV-NEXT: .cfi_def_cfa_offset 0 +; RV64IV-NEXT: addi sp, sp, -16 +; RV64IV-NEXT: .cfi_def_cfa_offset 16 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub sp, sp, a0 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vl1r.v v25, (a0) +; RV64IV-NEXT: addi a0, sp, 16 ; RV64IV-NEXT: vl1r.v v25, (a0) -; RV64IV-NEXT: vl1r.v v25, (sp) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: addi sp, sp, 16 ; RV64IV-NEXT: ret %local0 = alloca %local1 = alloca @@ -50,18 +58,22 @@ define void @local_var_m2() { ; RV64IV-LABEL: local_var_m2: ; RV64IV: # %bb.0: -; RV64IV-NEXT: .cfi_def_cfa_offset 0 +; RV64IV-NEXT: addi sp, sp, -16 +; RV64IV-NEXT: .cfi_def_cfa_offset 16 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: sub sp, sp, a0 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vl2r.v v26, (a0) +; RV64IV-NEXT: addi a0, sp, 16 ; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: vl2r.v v26, (sp) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: addi sp, sp, 16 ; RV64IV-NEXT: ret %local0 = alloca %local1 = alloca @@ -81,15 +93,17 @@ ; RV64IV-NEXT: .cfi_offset s0, -16 ; RV64IV-NEXT: addi s0, sp, 32 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 -; RV64IV-NEXT: andi sp, sp, -32 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 3 ; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: andi sp, sp, -32 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vl4r.v v28, (a0) +; RV64IV-NEXT: addi a0, sp, 16 ; RV64IV-NEXT: vl4r.v v28, (a0) -; RV64IV-NEXT: vl4r.v v28, (sp) ; RV64IV-NEXT: addi sp, s0, -32 ; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -113,15 +127,17 @@ ; RV64IV-NEXT: .cfi_offset s0, -16 ; RV64IV-NEXT: addi s0, sp, 64 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 -; RV64IV-NEXT: andi sp, sp, -64 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 4 ; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: andi sp, sp, -64 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 3 ; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: vl8r.v v8, (a0) +; RV64IV-NEXT: addi a0, sp, 48 ; RV64IV-NEXT: vl8r.v v8, (a0) -; RV64IV-NEXT: vl8r.v v8, (sp) ; RV64IV-NEXT: addi sp, s0, -64 ; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64IV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -142,19 +158,15 @@ ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: lw a0, 12(a0) +; RV64IV-NEXT: lw a0, 12(sp) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 ; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: vl2r.v v26, (sp) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: lw a0, 8(a0) +; RV64IV-NEXT: addi a0, sp, 16 +; RV64IV-NEXT: vl2r.v v26, (a0) +; RV64IV-NEXT: lw a0, 8(sp) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: add sp, sp, a0 @@ -174,13 +186,13 @@ define void @local_var_m2_with_varsize_object(i64 %n) { ; RV64IV-LABEL: local_var_m2_with_varsize_object: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -16 -; RV64IV-NEXT: .cfi_def_cfa_offset 16 -; RV64IV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64IV-NEXT: addi sp, sp, -32 +; RV64IV-NEXT: .cfi_def_cfa_offset 32 +; RV64IV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64IV-NEXT: .cfi_offset ra, -8 ; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 16 +; RV64IV-NEXT: addi s0, sp, 32 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 ; RV64IV-NEXT: csrr a1, vlenb ; RV64IV-NEXT: slli a1, a1, 2 @@ -192,22 +204,22 @@ ; RV64IV-NEXT: csrr a1, vlenb ; RV64IV-NEXT: slli a1, a1, 1 ; RV64IV-NEXT: sub a1, s0, a1 -; RV64IV-NEXT: addi a1, a1, -16 +; RV64IV-NEXT: addi a1, a1, -32 ; RV64IV-NEXT: call notdead@plt ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 ; RV64IV-NEXT: sub a0, s0, a0 -; RV64IV-NEXT: addi a0, a0, -16 +; RV64IV-NEXT: addi a0, a0, -32 ; RV64IV-NEXT: vl2r.v v26, (a0) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 2 ; RV64IV-NEXT: sub a0, s0, a0 -; RV64IV-NEXT: addi a0, a0, -16 +; RV64IV-NEXT: addi a0, a0, -32 ; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: addi sp, s0, -16 -; RV64IV-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 16 +; RV64IV-NEXT: addi sp, s0, -32 +; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 32 ; RV64IV-NEXT: ret %1 = alloca i8, i64 %n %2 = alloca @@ -231,11 +243,11 @@ ; RV64IV-NEXT: .cfi_offset s1, -24 ; RV64IV-NEXT: addi s0, sp, 256 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 -; RV64IV-NEXT: andi sp, sp, -128 -; RV64IV-NEXT: mv s1, sp ; RV64IV-NEXT: csrr a1, vlenb ; RV64IV-NEXT: slli a1, a1, 2 ; RV64IV-NEXT: sub sp, sp, a1 +; RV64IV-NEXT: andi sp, sp, -128 +; RV64IV-NEXT: mv s1, sp ; RV64IV-NEXT: addi a0, a0, 15 ; RV64IV-NEXT: andi a0, a0, -16 ; RV64IV-NEXT: sub a0, sp, a0 @@ -243,17 +255,16 @@ ; RV64IV-NEXT: addi a1, s1, 128 ; RV64IV-NEXT: csrr a2, vlenb ; RV64IV-NEXT: slli a2, a2, 1 -; RV64IV-NEXT: sub a2, s1, a2 -; RV64IV-NEXT: mv a2, a2 +; RV64IV-NEXT: add a2, s1, a2 +; RV64IV-NEXT: addi a2, a2, 224 ; RV64IV-NEXT: call notdead2@plt ; RV64IV-NEXT: lw a0, 124(s1) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 1 -; RV64IV-NEXT: sub a0, s1, a0 +; RV64IV-NEXT: add a0, s1, a0 +; RV64IV-NEXT: addi a0, a0, 224 ; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: sub a0, s1, a0 +; RV64IV-NEXT: addi a0, s1, 224 ; RV64IV-NEXT: vl2r.v v26, (a0) ; RV64IV-NEXT: lw a0, 120(s1) ; RV64IV-NEXT: addi sp, s0, -256 diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll --- a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll @@ -34,25 +34,29 @@ ; RV64IV-NEXT: .cfi_offset s0, -16 ; RV64IV-NEXT: addi s0, sp, 64 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 -; RV64IV-NEXT: andi sp, sp, -64 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 5 ; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: andi sp, sp, -64 ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: addi a1, zero, 24 ; RV64IV-NEXT: mul a0, a0, a1 ; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 48 ; RV64IV-NEXT: vl8r.v v8, (a0) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 4 ; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 48 ; RV64IV-NEXT: vl8r.v v16, (a0) ; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: slli a0, a0, 3 ; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 48 ; RV64IV-NEXT: vl8r.v v24, (a0) -; RV64IV-NEXT: mv a0, sp -; RV64IV-NEXT: vs8r.v v24, (sp) +; RV64IV-NEXT: addi a0, sp, 48 +; RV64IV-NEXT: addi a1, sp, 48 +; RV64IV-NEXT: vs8r.v v24, (a1) ; RV64IV-NEXT: call callee@plt ; RV64IV-NEXT: addi sp, s0, -64 ; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -14,10 +14,7 @@ ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: slli a1, a1, 1 ; SPILL-O0-NEXT: sub sp, sp, a1 -; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: slli a1, a1, 1 -; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: sw a0, 8(a1) # 4-byte Folded Spill +; SPILL-O0-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 ; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill @@ -32,10 +29,7 @@ ; SPILL-O0-NEXT: add a1, sp, a1 ; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: # kill: def $x11 killed $x10 -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 -; SPILL-O0-NEXT: add a0, sp, a0 -; SPILL-O0-NEXT: lw a0, 8(a0) # 4-byte Folded Reload +; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v8, v8, v25 ; SPILL-O0-NEXT: csrr a0, vlenb @@ -49,7 +43,7 @@ ; SPILL-O2: # %bb.0: ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 1 ; SPILL-O2-NEXT: sub sp, sp, a1 @@ -84,4 +78,4 @@ } declare @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i32 %gvl) -declare i32 @puts(i8*); +declare i32 @puts(i8*); \ No newline at end of file diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll @@ -7,26 +7,34 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_mf2: ; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_mf2: ; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -38,26 +46,34 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_1: ; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_1: ; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -69,30 +85,38 @@ define @spill_lmul_2( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_2: ; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs2r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl2re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_2: ; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs2r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl2re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -104,30 +128,38 @@ define @spill_lmul_4( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_4: ; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 2 ; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs4r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl4re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 2 ; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_4: ; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 2 ; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs4r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl4re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 2 ; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -139,30 +171,38 @@ define @spill_lmul_8( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_8: ; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 3 ; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs8r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 3 ; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_8: ; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 3 ; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs8r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 3 ; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -9,56 +9,56 @@ define @foo( %a, %b, %c, i64 %gvl) nounwind ; SPILL-O0-LABEL: foo: ; SPILL-O0: # %bb.0: -; SPILL-O0-NEXT: addi sp, sp, -16 -; SPILL-O0-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -32 +; SPILL-O0-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: slli a1, a1, 1 ; SPILL-O0-NEXT: sub sp, sp, a1 -; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: slli a1, a1, 1 -; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: sd a0, 0(a1) # 8-byte Folded Spill +; SPILL-O0-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v25, v8, v9 -; SPILL-O0-NEXT: vs1r.v v25, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: lui a0, %hi(.L.str) ; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) ; SPILL-O0-NEXT: call puts@plt -; SPILL-O0-NEXT: vl1r.v v25, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a1, sp, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; SPILL-O0-NEXT: # kill: def $x11 killed $x10 -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 -; SPILL-O0-NEXT: add a0, sp, a0 -; SPILL-O0-NEXT: ld a0, 0(a0) # 8-byte Folded Reload +; SPILL-O0-NEXT: # kill: def $x11 killed $x10 +; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O0-NEXT: vfadd.vv v8, v8, v25 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 -; SPILL-O0-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; SPILL-O0-NEXT: addi sp, sp, 32 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: foo: ; SPILL-O2: # %bb.0: -; SPILL-O2-NEXT: addi sp, sp, -16 -; SPILL-O2-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; SPILL-O2-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -32 +; SPILL-O2-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; SPILL-O2-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; SPILL-O2-NEXT: csrr a1, vlenb ; SPILL-O2-NEXT: slli a1, a1, 1 ; SPILL-O2-NEXT: sub sp, sp, a1 ; SPILL-O2-NEXT: mv s0, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a1, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli a0, a0, e64,m1,ta,mu ; SPILL-O2-NEXT: vfadd.vv v25, v8, v9 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 ; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: lui a0, %hi(.L.str) ; SPILL-O2-NEXT: addi a0, a0, %lo(.L.str) @@ -66,15 +66,17 @@ ; SPILL-O2-NEXT: vsetvli a0, s0, e64,m1,ta,mu ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 ; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload -; SPILL-O2-NEXT: vl1r.v v26, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: vfadd.vv v8, v26, v25 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 -; SPILL-O2-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; SPILL-O2-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; SPILL-O2-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; SPILL-O2-NEXT: addi sp, sp, 32 ; SPILL-O2-NEXT: ret { %x = call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %gvl) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -7,26 +7,34 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_1: ; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_1: ; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -38,30 +46,38 @@ define @spill_lmul_2( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_2: ; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs2r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl2re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_2: ; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs2r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl2re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 ; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -73,30 +89,38 @@ define @spill_lmul_4( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_4: ; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 2 ; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs4r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl4re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 2 ; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_4: ; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 2 ; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs4r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl4re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 2 ; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -108,30 +132,38 @@ define @spill_lmul_8( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_8: ; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 3 ; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs8r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 3 ; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_8: ; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 3 ; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs8r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 3 ; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll @@ -0,0 +1,137 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+m -verify-machineinstrs < %s | FileCheck %s + +define void @rvv_vla(i64 %n, i64 %i) nounwind { +; CHECK-LABEL: rvv_vla: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: addi a3, zero, 3 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, 15 +; CHECK-NEXT: andi a0, a0, -16 +; CHECK-NEXT: sub a0, sp, a0 +; CHECK-NEXT: mv sp, a0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: sub a2, s0, a2 +; CHECK-NEXT: addi a2, a2, -32 +; CHECK-NEXT: vl1re64.v v25, (a2) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: addi a3, zero, 3 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: sub a2, s0, a2 +; CHECK-NEXT: addi a2, a2, -32 +; CHECK-NEXT: vl2re64.v v26, (a2) +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: lw a0, 0(a0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %vla.addr = alloca i32, i64 %n + + %v1.addr = alloca + %v1 = load volatile , * %v1.addr + + %v2.addr = alloca + %v2 = load volatile , * %v2.addr + + %p = getelementptr i32, i32* %vla.addr, i64 %i + %s = load volatile i32, i32* %p + ret void +} + +define void @rvv_overaligned() nounwind { +; CHECK-LABEL: rvv_overaligned: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -128 +; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 128 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 112 +; CHECK-NEXT: vl1re64.v v25, (a0) +; CHECK-NEXT: addi a0, sp, 112 +; CHECK-NEXT: vl2re64.v v26, (a0) +; CHECK-NEXT: lw a0, 64(sp) +; CHECK-NEXT: addi sp, s0, -128 +; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 128 +; CHECK-NEXT: ret + %overaligned = alloca i32, align 64 + + %v1.addr = alloca + %v1 = load volatile , * %v1.addr + + %v2.addr = alloca + %v2 = load volatile , * %v2.addr + + %s = load volatile i32, i32* %overaligned, align 64 + ret void +} + +define void @rvv_vla_and_overaligned(i64 %n, i64 %i) nounwind { +; CHECK-LABEL: rvv_vla_and_overaligned +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -128 +; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 128 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: addi a3, zero, 3 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: mv s1, sp +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, 15 +; CHECK-NEXT: andi a0, a0, -16 +; CHECK-NEXT: sub a0, sp, a0 +; CHECK-NEXT: mv sp, a0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, s1, a2 +; CHECK-NEXT: addi a2, a2, 96 +; CHECK-NEXT: vl1re64.v v25, (a2) +; CHECK-NEXT: addi a2, s1, 96 +; CHECK-NEXT: vl2re64.v v26, (a2) +; CHECK-NEXT: lw a2, 64(s1) +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: lw a0, 0(a0) +; CHECK-NEXT: addi sp, s0, -128 +; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 128 +; CHECK-NEXT: ret + %overaligned = alloca i32, align 64 + %vla.addr = alloca i32, i64 %n + + %v1.addr = alloca + %v1 = load volatile , * %v1.addr + + %v2.addr = alloca + %v2 = load volatile , * %v2.addr + + %s1 = load volatile i32, i32* %overaligned, align 64 + %p = getelementptr i32, i32* %vla.addr, i64 %i + %s2 = load volatile i32, i32* %p + ret void + +} \ No newline at end of file diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll @@ -1979,9 +1979,11 @@ define @intrinsic_vaadd_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vaadd_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrrs a3, vlenb, zero ; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: vs1r.v v0, (sp) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu ; CHECK-NEXT: vmv.v.x v24, a1 ; CHECK-NEXT: addi a1, zero, 32 @@ -1991,10 +1993,12 @@ ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v24, v24, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: vl1re8.v v0, (sp) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vaadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaadd.mask.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll @@ -1979,9 +1979,11 @@ define @intrinsic_vaaddu_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrrs a3, vlenb, zero ; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: vs1r.v v0, (sp) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu ; CHECK-NEXT: vmv.v.x v24, a1 ; CHECK-NEXT: addi a1, zero, 32 @@ -1991,10 +1993,12 @@ ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v24, v24, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: vl1re8.v v0, (sp) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vaaddu.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vaaddu.mask.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll @@ -1979,9 +1979,11 @@ define @intrinsic_vadd_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vadd_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrrs a3, vlenb, zero ; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: vs1r.v v0, (sp) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu ; CHECK-NEXT: vmv.v.x v24, a1 ; CHECK-NEXT: addi a1, zero, 32 @@ -1991,10 +1993,12 @@ ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v24, v24, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: vl1re8.v v0, (sp) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vadd.mask.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll @@ -1979,9 +1979,11 @@ define @intrinsic_vand_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vand_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrrs a3, vlenb, zero ; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: vs1r.v v0, (sp) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu ; CHECK-NEXT: vmv.v.x v24, a1 ; CHECK-NEXT: addi a1, zero, 32 @@ -1991,10 +1993,12 @@ ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v24, v24, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: vl1re8.v v0, (sp) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vand.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vand.mask.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll @@ -1979,9 +1979,11 @@ define @intrinsic_vasub_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vasub_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrrs a3, vlenb, zero ; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: vs1r.v v0, (sp) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu ; CHECK-NEXT: vmv.v.x v24, a1 ; CHECK-NEXT: addi a1, zero, 32 @@ -1991,10 +1993,12 @@ ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v24, v24, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: vl1re8.v v0, (sp) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vasub.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasub.mask.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll @@ -1979,9 +1979,11 @@ define @intrinsic_vasubu_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vasubu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrrs a3, vlenb, zero ; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: vs1r.v v0, (sp) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu ; CHECK-NEXT: vmv.v.x v24, a1 ; CHECK-NEXT: addi a1, zero, 32 @@ -1991,10 +1993,12 @@ ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v24, v24, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: vl1re8.v v0, (sp) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vasubu.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vasubu.mask.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll @@ -1979,9 +1979,11 @@ define @intrinsic_vssra_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssra_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrrs a3, vlenb, zero ; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: vs1r.v v0, (sp) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu ; CHECK-NEXT: vmv.v.x v24, a1 ; CHECK-NEXT: addi a1, zero, 32 @@ -1991,10 +1993,12 @@ ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v24, v24, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: vl1re8.v v0, (sp) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vssra.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssra.mask.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll @@ -1979,9 +1979,11 @@ define @intrinsic_vssrl_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssrl_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrrs a3, vlenb, zero ; CHECK-NEXT: sub sp, sp, a3 -; CHECK-NEXT: vs1r.v v0, (sp) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a3, a2, e64,m8,ta,mu ; CHECK-NEXT: vmv.v.x v24, a1 ; CHECK-NEXT: addi a1, zero, 32 @@ -1991,10 +1993,12 @@ ; CHECK-NEXT: vsrl.vx v24, v24, a1 ; CHECK-NEXT: vor.vv v24, v24, v0 ; CHECK-NEXT: vsetvli a0, a2, e64,m8,tu,mu -; CHECK-NEXT: vl1re8.v v0, (sp) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1re8.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vssrl.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: jalr zero, 0(ra) entry: %a = call @llvm.riscv.vssrl.mask.nxv8i64.i64(