diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -68,6 +68,9 @@ bool isSupportedStackID(TargetStackID::Value ID) const override; TargetStackID::Value getStackIDForScalableVectors() const override; + void processFunctionBeforeFrameIndicesReplaced( + MachineFunction &MF, RegScavenger *RS = nullptr) const override; + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -480,6 +480,9 @@ } } + if (RVVStackSize) + adjustStackForRVV(MF, MBB, MBBI, DL, -RVVStackSize); + if (hasFP(MF)) { // Realign Stack const RISCVRegisterInfo *RI = STI.getRegisterInfo(); @@ -511,9 +514,6 @@ } } } - - if (RVVStackSize) - adjustStackForRVV(MF, MBB, MBBI, DL, -RVVStackSize); } void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, @@ -645,35 +645,36 @@ if (hasBP(MF)) { FrameReg = RISCVABI::getBPReg(); // |--------------------------| -- <-- FP - // | callee-saved registers | | - // |--------------------------| | MFI.getStackSize() - // | scalar local variables | | - // |--------------------------| -- - // | Realignment | | + // | callee-saved registers | | <---------. + // |--------------------------| -- | + // | RVV objects | | | + // |--------------------------| -- |-- MFI.getStackSize() + // | realignment | | | + // |--------------------------| -- | + // | scalar local variables | | <---------' // |--------------------------| -- <-- BP - // | RVV objects | | RVFI->getRVVStackSize() - // |--------------------------| -- // | VarSize objects | | // |--------------------------| -- <-- SP } else { FrameReg = RISCV::X2; - // When using SP to access frame objects, we need to add RVV stack size. - // // |--------------------------| -- <-- FP - // | callee-saved registers | | - // |--------------------------| | MFI.getStackSize() - // | scalar local variables | | - // |--------------------------| -- - // | Realignment | | - // |--------------------------| -- - // | RVV objects | | RVFI->getRVVStackSize() + // | callee-saved registers | | <---------. + // |--------------------------| -- | + // | RVV objects | | | + // |--------------------------| -- |-- MFI.getStackSize() + // | realignment | | | + // |--------------------------| -- | + // | scalar local variables | | <---------' // |--------------------------| -- <-- SP - Offset += StackOffset::getScalable(RVFI->getRVVStackSize()); } if (MFI.getStackID(FI) == TargetStackID::Default) { Offset += StackOffset::getFixed(MFI.getStackSize()); if (FI < 0) Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); + } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + Offset += + StackOffset::get(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), + RVFI->getRVVStackSize()); } } else { FrameReg = RI->getFrameRegister(MF); @@ -683,33 +684,37 @@ Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize()); // When using FP to access scalable vector objects, we need to minus // the frame size. - // // |--------------------------| -- <-- FP - // | callee-saved registers | | - // |--------------------------| | MFI.getStackSize() - // | scalar local variables | | - // |--------------------------| -- (Offset of RVV objects is from here.) - // | RVV objects | + // | callee-saved registers | |<--------. + // |--------------------------| -- | + // | RVV objects | | |-- MFI.getStackSize() + // |--------------------------| -- | + // | scalar local variables | |<--------' // |--------------------------| // | VarSize objects | // |--------------------------| <-- SP if (MFI.getStackID(FI) == TargetStackID::ScalableVector) - Offset -= StackOffset::getFixed(MFI.getStackSize()); + Offset -= StackOffset::getFixed(RVFI->getCalleeSavedStackSize()); + else if (MFI.getStackID(FI) == TargetStackID::Default) + Offset -= StackOffset::getScalable(RVFI->getRVVStackSize()); } else { // When using SP to access frame objects, we need to add RVV stack size. // // |--------------------------| -- <-- FP - // | callee-saved registers | | - // |--------------------------| | MFI.getStackSize() - // | scalar local variables | | - // |--------------------------| -- - // | RVV objects | | RVFI->getRVVStackSize() + // | callee-saved registers | |<--------. + // |--------------------------| -- | + // | RVV objects | | |-- MFI.getStackSize() + // |--------------------------| -- | + // | scalar local variables | |<--------' // |--------------------------| -- <-- SP - Offset += StackOffset::getScalable(RVFI->getRVVStackSize()); if (MFI.getStackID(FI) == TargetStackID::Default) { Offset += StackOffset::getFixed(MFI.getStackSize()); if (FI < 0) Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); + } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + Offset += StackOffset::get(MFI.getStackSize() - + RVFI->getCalleeSavedStackSize(), + RVFI->getRVVStackSize()); } } } @@ -798,21 +803,46 @@ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterClass *RC = &RISCV::GPRRegClass; + + auto *RVFI = MF.getInfo(); + int64_t RVVStackSize = assignRVVStackObjectOffsets(MFI); + RVFI->setRVVStackSize(RVVStackSize); + // estimateStackSize has been observed to under-estimate the final stack // size, so give ourselves wiggle-room by checking for stack size // representable an 11-bit signed field rather than 12-bits. // FIXME: It may be possible to craft a function with a small stack that // still needs an emergency spill slot for branch relaxation. This case // would currently be missed. - if (!isInt<11>(MFI.estimateStackSize(MF))) { + if (!isInt<11>(MFI.estimateStackSize(MF)) || RVVStackSize != 0) { int RegScavFI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC), RegInfo->getSpillAlign(*RC), false); RS->addScavengingFrameIndex(RegScavFI); } +} +void RISCVFrameLowering::processFunctionBeforeFrameIndicesReplaced( + MachineFunction &MF, RegScavenger *RS) const { auto *RVFI = MF.getInfo(); - int64_t RVVStackSize = assignRVVStackObjectOffsets(MFI); - RVFI->setRVVStackSize(RVVStackSize); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF)) { + RVFI->setCalleeSavedStackSize(0); + return; + } + int64_t MinOffset = std::numeric_limits::max(); + int64_t MaxOffset = std::numeric_limits::min(); + for (const auto &Info : MFI.getCalleeSavedInfo()) { + int FrameIdx = Info.getFrameIdx(); + if (MFI.getStackID(FrameIdx) != TargetStackID::Default) + continue; + int64_t Offset = MFI.getObjectOffset(FrameIdx); + int64_t ObjSize = MFI.getObjectSize(FrameIdx); + MinOffset = std::min(Offset, MinOffset); + MaxOffset = std::max(Offset + ObjSize, MaxOffset); + } + + unsigned Size = alignTo(MaxOffset - MinOffset, 16); + RVFI->setCalleeSavedStackSize(Size); } // Not preserve stack space within prologue for outgoing variables when the diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -34,6 +34,8 @@ unsigned LibCallStackSize = 0; /// Size of RVV stack. uint64_t RVVStackSize = 0; + /// Size of stack frame to save callee saved registers + unsigned CalleeSavedStackSize = 0; public: RISCVMachineFunctionInfo(const MachineFunction &MF) {} @@ -63,6 +65,9 @@ uint64_t getRVVStackSize() const { return RVVStackSize; } void setRVVStackSize(uint64_t Size) { RVVStackSize = Size; } + + unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } + void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll --- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll @@ -30,23 +30,20 @@ define @access_fixed_and_vector_objects(i64 *%val) { ; RV64IV-LABEL: access_fixed_and_vector_objects: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -528 +; RV64IV-NEXT: addi sp, sp, -528 ; RV64IV-NEXT: .cfi_def_cfa_offset 528 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: addi a0, a0, 8 -; RV64IV-NEXT: vl1re64.v v25, (a0) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: ld a0, 520(a0) -; RV64IV-NEXT: vl1re64.v v26, (sp) -; RV64IV-NEXT: vsetvli a0, a0, e64,m1,ta,mu -; RV64IV-NEXT: vadd.vv v8, v25, v26 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: add sp, sp, a0 -; RV64IV-NEXT: addi sp, sp, 528 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: addi a0, sp, 8 +; RV64IV-NEXT: vl1re64.v v25, (a0) +; RV64IV-NEXT: ld a0, 520(sp) +; RV64IV-NEXT: addi a1, sp, 528 +; RV64IV-NEXT: vl1re64.v v26, (a1) +; RV64IV-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; RV64IV-NEXT: vadd.vv v8, v25, v26 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: addi sp, sp, 528 ; RV64IV-NEXT: ret %local = alloca i64 %vector = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -5,10 +5,12 @@ define void @lmul1() nounwind { ; CHECK-LABEL: lmul1: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = alloca ret void @@ -17,12 +19,14 @@ define void @lmul2() nounwind { ; CHECK-LABEL: lmul2: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = alloca ret void @@ -31,18 +35,18 @@ define void @lmul4() nounwind { ; CHECK-LABEL: lmul4: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret %v = alloca ret void @@ -51,18 +55,18 @@ define void @lmul8() nounwind { ; CHECK-LABEL: lmul8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 -; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: addi sp, s0, -64 +; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret %v = alloca ret void @@ -71,14 +75,16 @@ define void @lmul1_and_2() nounwind { ; CHECK-LABEL: lmul1_and_2: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 3 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 3 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -88,19 +94,19 @@ define void @lmul2_and_4() nounwind { ; CHECK-LABEL: lmul2_and_4: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 6 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -110,19 +116,19 @@ define void @lmul1_and_4() nounwind { ; CHECK-LABEL: lmul1_and_4: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 5 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 5 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -132,14 +138,16 @@ define void @lmul2_and_1() nounwind { ; CHECK-LABEL: lmul2_and_1: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 3 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 3 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -149,19 +157,19 @@ define void @lmul4_and_1() nounwind { ; CHECK-LABEL: lmul4_and_1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 5 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 5 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -171,19 +179,19 @@ define void @lmul4_and_2() nounwind { ; CHECK-LABEL: lmul4_and_2: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 6 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -193,19 +201,19 @@ define void @lmul4_and_2_x2_0() nounwind { ; CHECK-LABEL: lmul4_and_2_x2_0: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 12 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -217,19 +225,19 @@ define void @lmul4_and_2_x2_1() nounwind { ; CHECK-LABEL: lmul4_and_2_x2_1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 12 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret %v1 = alloca %v3 = alloca @@ -242,22 +250,18 @@ define void @gpr_and_lmul1_and_2() nounwind { ; CHECK-LABEL: gpr_and_lmul1_and_2: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 3 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi a0, zero, 3 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: addi a2, zero, 3 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: sd a0, 8(a1) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 3 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: sd a0, 8(sp) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %x1 = alloca i64 %v1 = alloca @@ -269,25 +273,21 @@ define void @gpr_and_lmul1_and_4() nounwind { ; CHECK-LABEL: gpr_and_lmul1_and_4: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 32 -; CHECK-NEXT: andi sp, sp, -32 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 5 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi a0, zero, 3 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: addi a2, zero, 5 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: sd a0, 8(a1) -; CHECK-NEXT: addi sp, s0, -32 -; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 5 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: sd a0, 8(sp) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret %x1 = alloca i64 %v1 = alloca @@ -299,19 +299,19 @@ define void @lmul_1_2_4_8() nounwind { ; CHECK-LABEL: lmul_1_2_4_8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 -; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 15 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 15 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: addi sp, s0, -64 +; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -323,19 +323,19 @@ define void @lmul_1_2_4_8_x2_0() nounwind { ; CHECK-LABEL: lmul_1_2_4_8_x2_0: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 -; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 30 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: addi sp, s0, -64 +; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca @@ -351,19 +351,19 @@ define void @lmul_1_2_4_8_x2_1() nounwind { ; CHECK-LABEL: lmul_1_2_4_8_x2_1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: addi s0, sp, 64 -; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a1, zero, 30 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: addi sp, s0, -64 -; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: addi sp, s0, -64 +; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret %v8 = alloca %v7 = alloca @@ -379,12 +379,14 @@ define void @masks() nounwind { ; CHECK-LABEL: masks: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v1 = alloca %v2 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll --- a/llvm/test/CodeGen/RISCV/rvv/localvar.ll +++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -5,19 +5,19 @@ define void @local_var_mf8() { ; RV64IV-LABEL: local_var_mf8: ; RV64IV: # %bb.0: -; RV64IV-NEXT: .cfi_def_cfa_offset 0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 -; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: vsetvli a0, zero, e8,mf8,ta,mu -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: vle8.v v25, (a0) -; RV64IV-NEXT: vle8.v v25, (sp) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 -; RV64IV-NEXT: add sp, sp, a0 -; RV64IV-NEXT: ret +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vle8.v v25, (sp) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret %local0 = alloca %local1 = alloca load volatile , * %local0 @@ -28,18 +28,18 @@ define void @local_var_m1() { ; RV64IV-LABEL: local_var_m1: ; RV64IV: # %bb.0: -; RV64IV-NEXT: .cfi_def_cfa_offset 0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 -; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: vl1r.v v25, (a0) -; RV64IV-NEXT: vl1r.v v25, (sp) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 -; RV64IV-NEXT: add sp, sp, a0 -; RV64IV-NEXT: ret +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: vl1r.v v25, (a0) +; CHECK-NEXT: vl1r.v v25, (sp) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret %local0 = alloca %local1 = alloca load volatile , * %local0 @@ -50,19 +50,19 @@ define void @local_var_m2() { ; RV64IV-LABEL: local_var_m2: ; RV64IV: # %bb.0: -; RV64IV-NEXT: .cfi_def_cfa_offset 0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: vl2r.v v26, (sp) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: add sp, sp, a0 -; RV64IV-NEXT: ret +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: vl2r.v v26, (sp) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret %local0 = alloca %local1 = alloca load volatile , * %local0 @@ -73,28 +73,30 @@ define void @local_var_m4() { ; RV64IV-LABEL: local_var_m4: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -32 -; RV64IV-NEXT: .cfi_def_cfa_offset 32 -; RV64IV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64IV-NEXT: .cfi_offset ra, -8 -; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 32 -; RV64IV-NEXT: .cfi_def_cfa s0, 0 -; RV64IV-NEXT: andi sp, sp, -32 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 3 -; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: vl4r.v v28, (a0) -; RV64IV-NEXT: vl4r.v v28, (sp) -; RV64IV-NEXT: addi sp, s0, -32 -; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 32 -; RV64IV-NEXT: ret +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl4r.v v28, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl4r.v v28, (a0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret %local0 = alloca %local1 = alloca load volatile , * %local0 @@ -105,28 +107,30 @@ define void @local_var_m8() { ; RV64IV-LABEL: local_var_m8: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -64 -; RV64IV-NEXT: .cfi_def_cfa_offset 64 -; RV64IV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64IV-NEXT: .cfi_offset ra, -8 -; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 64 -; RV64IV-NEXT: .cfi_def_cfa s0, 0 -; RV64IV-NEXT: andi sp, sp, -64 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 4 -; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 3 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: vl8r.v v8, (a0) -; RV64IV-NEXT: vl8r.v v8, (sp) -; RV64IV-NEXT: addi sp, s0, -64 -; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 64 -; RV64IV-NEXT: ret +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 48 +; CHECK-NEXT: vl8r.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 48 +; CHECK-NEXT: vl8r.v v8, (a0) +; CHECK-NEXT: addi sp, s0, -64 +; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: ret %local0 = alloca %local1 = alloca load volatile , * %local0 @@ -137,29 +141,25 @@ define void @local_var_m2_mix_local_scalar() { ; RV64IV-LABEL: local_var_m2_mix_local_scalar: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -16 -; RV64IV-NEXT: .cfi_def_cfa_offset 16 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: lw a0, 12(a0) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: vl2r.v v26, (sp) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: lw a0, 8(a0) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: add sp, sp, a0 -; RV64IV-NEXT: addi sp, sp, 16 -; RV64IV-NEXT: ret +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: lw a0, 12(sp) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: lw a0, 8(sp) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %local_scalar0 = alloca i32 %local0 = alloca %local1 = alloca @@ -174,41 +174,41 @@ define void @local_var_m2_with_varsize_object(i64 %n) { ; RV64IV-LABEL: local_var_m2_with_varsize_object: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -16 -; RV64IV-NEXT: .cfi_def_cfa_offset 16 -; RV64IV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64IV-NEXT: .cfi_offset ra, -8 -; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 16 -; RV64IV-NEXT: .cfi_def_cfa s0, 0 -; RV64IV-NEXT: csrr a1, vlenb -; RV64IV-NEXT: slli a1, a1, 2 -; RV64IV-NEXT: sub sp, sp, a1 -; RV64IV-NEXT: addi a0, a0, 15 -; RV64IV-NEXT: andi a0, a0, -16 -; RV64IV-NEXT: sub a0, sp, a0 -; RV64IV-NEXT: mv sp, a0 -; RV64IV-NEXT: csrr a1, vlenb -; RV64IV-NEXT: slli a1, a1, 1 -; RV64IV-NEXT: sub a1, s0, a1 -; RV64IV-NEXT: addi a1, a1, -16 -; RV64IV-NEXT: call notdead@plt -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 -; RV64IV-NEXT: sub a0, s0, a0 -; RV64IV-NEXT: addi a0, a0, -16 -; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: sub a0, s0, a0 -; RV64IV-NEXT: addi a0, a0, -16 -; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: addi sp, s0, -16 -; RV64IV-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 16 -; RV64IV-NEXT: ret +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: addi s0, sp, 16 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: addi a0, a0, 15 +; CHECK-NEXT: andi a0, a0, -16 +; CHECK-NEXT: sub a0, sp, a0 +; CHECK-NEXT: mv sp, a0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a1, s0, a1 +; CHECK-NEXT: addi a1, a1, -16 +; CHECK-NEXT: call notdead@plt +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub a0, s0, a0 +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub a0, s0, a0 +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: addi sp, s0, -16 +; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %1 = alloca i8, i64 %n %2 = alloca %3 = alloca @@ -221,47 +221,46 @@ define void @local_var_m2_with_bp(i64 %n) { ; RV64IV-LABEL: local_var_m2_with_bp: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -256 -; RV64IV-NEXT: .cfi_def_cfa_offset 256 -; RV64IV-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s1, 232(sp) # 8-byte Folded Spill -; RV64IV-NEXT: .cfi_offset ra, -8 -; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: .cfi_offset s1, -24 -; RV64IV-NEXT: addi s0, sp, 256 -; RV64IV-NEXT: .cfi_def_cfa s0, 0 -; RV64IV-NEXT: andi sp, sp, -128 -; RV64IV-NEXT: mv s1, sp -; RV64IV-NEXT: csrr a1, vlenb -; RV64IV-NEXT: slli a1, a1, 2 -; RV64IV-NEXT: sub sp, sp, a1 -; RV64IV-NEXT: addi a0, a0, 15 -; RV64IV-NEXT: andi a0, a0, -16 -; RV64IV-NEXT: sub a0, sp, a0 -; RV64IV-NEXT: mv sp, a0 -; RV64IV-NEXT: addi a1, s1, 128 -; RV64IV-NEXT: csrr a2, vlenb -; RV64IV-NEXT: slli a2, a2, 1 -; RV64IV-NEXT: sub a2, s1, a2 -; RV64IV-NEXT: mv a2, a2 -; RV64IV-NEXT: call notdead2@plt -; RV64IV-NEXT: lw a0, 124(s1) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 1 -; RV64IV-NEXT: sub a0, s1, a0 -; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 2 -; RV64IV-NEXT: sub a0, s1, a0 -; RV64IV-NEXT: vl2r.v v26, (a0) -; RV64IV-NEXT: lw a0, 120(s1) -; RV64IV-NEXT: addi sp, s0, -256 -; RV64IV-NEXT: ld s1, 232(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 256 -; RV64IV-NEXT: ret +; CHECK-NEXT: addi sp, sp, -256 +; CHECK-NEXT: .cfi_def_cfa_offset 256 +; CHECK-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 232(sp) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, -8 +; CHECK-NEXT: .cfi_offset s0, -16 +; CHECK-NEXT: .cfi_offset s1, -24 +; CHECK-NEXT: addi s0, sp, 256 +; CHECK-NEXT: .cfi_def_cfa s0, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: andi sp, sp, -128 +; CHECK-NEXT: mv s1, sp +; CHECK-NEXT: addi a0, a0, 15 +; CHECK-NEXT: andi a0, a0, -16 +; CHECK-NEXT: sub a0, sp, a0 +; CHECK-NEXT: mv sp, a0 +; CHECK-NEXT: addi a1, s1, 128 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, s1, a2 +; CHECK-NEXT: addi a2, a2, 224 +; CHECK-NEXT: call notdead2@plt +; CHECK-NEXT: lw a0, 124(s1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, s1, a0 +; CHECK-NEXT: addi a0, a0, 224 +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: addi a0, s1, 224 +; CHECK-NEXT: vl2r.v v26, (a0) +; CHECK-NEXT: lw a0, 120(s1) +; CHECK-NEXT: addi sp, s0, -256 +; CHECK-NEXT: ld s1, 232(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 256 +; CHECK-NEXT: ret %1 = alloca i8, i64 %n %2 = alloca i32, align 128 %local_scalar0 = alloca i32 diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll --- a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll @@ -26,38 +26,42 @@ define @caller() { ; RV64IV-LABEL: caller: ; RV64IV: # %bb.0: -; RV64IV-NEXT: addi sp, sp, -64 +; RV64IV-NEXT: addi sp, sp, -64 ; RV64IV-NEXT: .cfi_def_cfa_offset 64 -; RV64IV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64IV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64IV-NEXT: .cfi_offset ra, -8 ; RV64IV-NEXT: .cfi_offset s0, -16 -; RV64IV-NEXT: addi s0, sp, 64 +; RV64IV-NEXT: addi s0, sp, 64 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 -; RV64IV-NEXT: andi sp, sp, -64 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 5 -; RV64IV-NEXT: sub sp, sp, a0 -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: addi a1, zero, 24 -; RV64IV-NEXT: mul a0, a0, a1 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: vl8r.v v8, (a0) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 4 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: vl8r.v v16, (a0) -; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 3 -; RV64IV-NEXT: add a0, sp, a0 -; RV64IV-NEXT: vl8r.v v24, (a0) -; RV64IV-NEXT: mv a0, sp -; RV64IV-NEXT: vs8r.v v24, (sp) -; RV64IV-NEXT: call callee@plt -; RV64IV-NEXT: addi sp, s0, -64 -; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64IV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64IV-NEXT: addi sp, sp, 64 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 5 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: andi sp, sp, -64 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: addi a1, zero, 24 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: vl8r.v v8, (a0) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 4 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: vl8r.v v16, (a0) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 3 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 48 +; RV64IV-NEXT: vl8r.v v24, (a0) +; RV64IV-NEXT: addi a0, sp, 48 +; RV64IV-NEXT: addi a1, sp, 48 +; RV64IV-NEXT: vs8r.v v24, (a1) +; RV64IV-NEXT: call callee@plt +; RV64IV-NEXT: addi sp, s0, -64 +; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 64 ; RV64IV-NEXT: ret %local0 = alloca %local1 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -9,72 +9,66 @@ define @foo( %a, %b, %c, i32 %gvl) nounwind ; SPILL-O0-LABEL: foo: ; SPILL-O0: # %bb.0: -; SPILL-O0-NEXT: addi sp, sp, -16 -; SPILL-O0-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: slli a1, a1, 1 -; SPILL-O0-NEXT: sub sp, sp, a1 -; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: slli a1, a1, 1 -; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: sw a0, 8(a1) # 4-byte Folded Spill -; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu -; SPILL-O0-NEXT: vfadd.vv v25, v8, v9 -; SPILL-O0-NEXT: vs1r.v v25, (sp) # Unknown-size Folded Spill -; SPILL-O0-NEXT: lui a0, %hi(.L.str) -; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) -; SPILL-O0-NEXT: call puts@plt -; SPILL-O0-NEXT: vl1r.v v25, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; SPILL-O0-NEXT: # kill: def $x11 killed $x10 -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 -; SPILL-O0-NEXT: add a0, sp, a0 -; SPILL-O0-NEXT: lw a0, 8(a0) # 4-byte Folded Reload -; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu -; SPILL-O0-NEXT: vfadd.vv v8, v8, v25 -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 -; SPILL-O0-NEXT: add sp, sp, a0 -; SPILL-O0-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a1, a1, 1 +; SPILL-O0-NEXT: sub sp, sp, a1 +; SPILL-O0-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; SPILL-O0-NEXT: vfadd.vv v25, v8, v9 +; SPILL-O0-NEXT: vs1r.v v25, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: lui a0, %hi(.L.str) +; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) +; SPILL-O0-NEXT: call puts@plt +; SPILL-O0-NEXT: vl1r.v v25, (sp) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload +; SPILL-O0-NEXT: # kill: def $x11 killed $x10 +; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; SPILL-O0-NEXT: vfadd.vv v8, v8, v25 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: foo: ; SPILL-O2: # %bb.0: -; SPILL-O2-NEXT: addi sp, sp, -16 -; SPILL-O2-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: slli a1, a1, 1 -; SPILL-O2-NEXT: sub sp, sp, a1 -; SPILL-O2-NEXT: mv s0, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill -; SPILL-O2-NEXT: vsetvli a0, a0, e64,m1,ta,mu -; SPILL-O2-NEXT: vfadd.vv v25, v8, v9 -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: add a0, sp, a0 -; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill -; SPILL-O2-NEXT: lui a0, %hi(.L.str) -; SPILL-O2-NEXT: addi a0, a0, %lo(.L.str) -; SPILL-O2-NEXT: call puts@plt -; SPILL-O2-NEXT: vsetvli a0, s0, e64,m1,ta,mu -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: add a0, sp, a0 -; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload -; SPILL-O2-NEXT: vl1r.v v26, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: vfadd.vv v8, v26, v25 -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 -; SPILL-O2-NEXT: add sp, sp, a0 -; SPILL-O2-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; SPILL-O2-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; SPILL-O2-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: sub sp, sp, a1 +; SPILL-O2-NEXT: mv s0, a0 +; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; SPILL-O2-NEXT: vfadd.vv v25, v8, v9 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: lui a0, %hi(.L.str) +; SPILL-O2-NEXT: addi a0, a0, %lo(.L.str) +; SPILL-O2-NEXT: call puts@plt +; SPILL-O2-NEXT: vsetvli a0, s0, e64,m1,ta,mu +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: vl1r.v v26, (sp) # Unknown-size Folded Reload +; SPILL-O2-NEXT: vfadd.vv v8, v26, v25 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; SPILL-O2-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret { %x = call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i32 %gvl) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll @@ -7,26 +7,34 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_mf2: ; SPILL-O0: # %bb.0: # %entry -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_mf2: ; SPILL-O2: # %bb.0: # %entry -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -38,26 +46,34 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_1: ; SPILL-O0: # %bb.0: # %entry -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_1: ; SPILL-O2: # %bb.0: # %entry -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -69,30 +85,38 @@ define @spill_lmul_2( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_2: ; SPILL-O0: # %bb.0: # %entry -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 -; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs2r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl2re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 -; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_2: ; SPILL-O2: # %bb.0: # %entry -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 -; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs2r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl2re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 -; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -104,30 +128,38 @@ define @spill_lmul_4( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_4: ; SPILL-O0: # %bb.0: # %entry -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 2 -; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs4r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl4re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 2 -; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_4: ; SPILL-O2: # %bb.0: # %entry -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 2 -; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs4r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl4re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 2 -; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -139,30 +171,38 @@ define @spill_lmul_8( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_8: ; SPILL-O0: # %bb.0: # %entry -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 3 -; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs8r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 3 -; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_8: ; SPILL-O2: # %bb.0: # %entry -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 3 -; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs8r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 3 -; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -9,72 +9,74 @@ define @foo( %a, %b, %c, i64 %gvl) nounwind ; SPILL-O0-LABEL: foo: ; SPILL-O0: # %bb.0: -; SPILL-O0-NEXT: addi sp, sp, -16 -; SPILL-O0-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: slli a1, a1, 1 -; SPILL-O0-NEXT: sub sp, sp, a1 -; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: slli a1, a1, 1 -; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: sd a0, 0(a1) # 8-byte Folded Spill -; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu -; SPILL-O0-NEXT: vfadd.vv v25, v8, v9 -; SPILL-O0-NEXT: vs1r.v v25, (sp) # Unknown-size Folded Spill -; SPILL-O0-NEXT: lui a0, %hi(.L.str) -; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) -; SPILL-O0-NEXT: call puts@plt -; SPILL-O0-NEXT: vl1r.v v25, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: add a1, sp, a1 -; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload -; SPILL-O0-NEXT: # kill: def $x11 killed $x10 -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 -; SPILL-O0-NEXT: add a0, sp, a0 -; SPILL-O0-NEXT: ld a0, 0(a0) # 8-byte Folded Reload -; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu -; SPILL-O0-NEXT: vfadd.vv v8, v8, v25 -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 -; SPILL-O0-NEXT: add sp, sp, a0 -; SPILL-O0-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: addi sp, sp, -32 +; SPILL-O0-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a1, a1, 1 +; SPILL-O0-NEXT: sub sp, sp, a1 +; SPILL-O0-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; SPILL-O0-NEXT: vfadd.vv v25, v8, v9 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: lui a0, %hi(.L.str) +; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) +; SPILL-O0-NEXT: call puts@plt +; SPILL-O0-NEXT: addi a1, sp, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload +; SPILL-O0-NEXT: # kill: def $x11 killed $x10 +; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; SPILL-O0-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; SPILL-O0-NEXT: vfadd.vv v8, v8, v25 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; SPILL-O0-NEXT: addi sp, sp, 32 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: foo: ; SPILL-O2: # %bb.0: -; SPILL-O2-NEXT: addi sp, sp, -16 -; SPILL-O2-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; SPILL-O2-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: slli a1, a1, 1 -; SPILL-O2-NEXT: sub sp, sp, a1 -; SPILL-O2-NEXT: mv s0, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill -; SPILL-O2-NEXT: vsetvli a0, a0, e64,m1,ta,mu -; SPILL-O2-NEXT: vfadd.vv v25, v8, v9 -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: add a0, sp, a0 -; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill -; SPILL-O2-NEXT: lui a0, %hi(.L.str) -; SPILL-O2-NEXT: addi a0, a0, %lo(.L.str) -; SPILL-O2-NEXT: call puts@plt -; SPILL-O2-NEXT: vsetvli a0, s0, e64,m1,ta,mu -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: add a0, sp, a0 -; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload -; SPILL-O2-NEXT: vl1r.v v26, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: vfadd.vv v8, v26, v25 -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 -; SPILL-O2-NEXT: add sp, sp, a0 -; SPILL-O2-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; SPILL-O2-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: addi sp, sp, -32 +; SPILL-O2-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; SPILL-O2-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: sub sp, sp, a1 +; SPILL-O2-NEXT: mv s0, a0 +; SPILL-O2-NEXT: addi a1, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; SPILL-O2-NEXT: vfadd.vv v25, v8, v9 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: lui a0, %hi(.L.str) +; SPILL-O2-NEXT: addi a0, a0, %lo(.L.str) +; SPILL-O2-NEXT: call puts@plt +; SPILL-O2-NEXT: vsetvli a0, s0, e64,m1,ta,mu +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: vfadd.vv v8, v26, v25 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; SPILL-O2-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; SPILL-O2-NEXT: addi sp, sp, 32 ; SPILL-O2-NEXT: ret { %x = call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %gvl) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -7,26 +7,34 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_1: ; SPILL-O0: # %bb.0: # %entry -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_1: ; SPILL-O2: # %bb.0: # %entry -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl1r.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -38,30 +46,38 @@ define @spill_lmul_2( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_2: ; SPILL-O0: # %bb.0: # %entry -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 -; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs2r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl2re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 -; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_2: ; SPILL-O2: # %bb.0: # %entry -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 -; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs2r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl2re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 -; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -73,30 +89,38 @@ define @spill_lmul_4( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_4: ; SPILL-O0: # %bb.0: # %entry -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 2 -; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs4r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl4re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 2 -; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_4: ; SPILL-O2: # %bb.0: # %entry -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 2 -; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs4r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl4re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 2 -; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "", @@ -108,30 +132,38 @@ define @spill_lmul_8( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_8: ; SPILL-O0: # %bb.0: # %entry -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 3 -; SPILL-O0-NEXT: sub sp, sp, a0 -; SPILL-O0-NEXT: vs8r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP ; SPILL-O0-NEXT: #NO_APP -; SPILL-O0-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 3 -; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret ; ; SPILL-O2-LABEL: spill_lmul_8: ; SPILL-O2: # %bb.0: # %entry -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 3 -; SPILL-O2-NEXT: sub sp, sp, a0 -; SPILL-O2-NEXT: vs8r.v v8, (sp) # Unknown-size Folded Spill +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP -; SPILL-O2-NEXT: vl8re8.v v8, (sp) # Unknown-size Folded Reload -; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 3 -; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret entry: call void asm sideeffect "",