diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -62,6 +62,8 @@ bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandVMSET_VMCLR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opcode); + bool expandVSPILL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandVRELOAD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); }; char RISCVExpandPseudo::ID = 0; @@ -123,6 +125,30 @@ case RISCV::PseudoVMSET_M_B64: // vmset.m vd => vmxnor.mm vd, vd, vd return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXNOR_MM); + case RISCV::PseudoVSPILL2_M1: + case RISCV::PseudoVSPILL2_M2: + case RISCV::PseudoVSPILL2_M4: + case RISCV::PseudoVSPILL3_M1: + case RISCV::PseudoVSPILL3_M2: + case RISCV::PseudoVSPILL4_M1: + case RISCV::PseudoVSPILL4_M2: + case RISCV::PseudoVSPILL5_M1: + case RISCV::PseudoVSPILL6_M1: + case RISCV::PseudoVSPILL7_M1: + case RISCV::PseudoVSPILL8_M1: + return expandVSPILL(MBB, MBBI); + case RISCV::PseudoVRELOAD2_M1: + case RISCV::PseudoVRELOAD2_M2: + case RISCV::PseudoVRELOAD2_M4: + case RISCV::PseudoVRELOAD3_M1: + case RISCV::PseudoVRELOAD3_M2: + case RISCV::PseudoVRELOAD4_M1: + case RISCV::PseudoVRELOAD4_M2: + case RISCV::PseudoVRELOAD5_M1: + case RISCV::PseudoVRELOAD6_M1: + case RISCV::PseudoVRELOAD7_M1: + case RISCV::PseudoVRELOAD8_M1: + return expandVRELOAD(MBB, MBBI); } return false; @@ -253,6 +279,72 @@ return true; } +bool RISCVExpandPseudo::expandVSPILL(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + const TargetRegisterInfo *TRI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + DebugLoc DL = MBBI->getDebugLoc(); + Register SrcReg = MBBI->getOperand(0).getReg(); + Register Base = MBBI->getOperand(1).getReg(); + Register VL = MBBI->getOperand(2).getReg(); + unsigned NF = MBBI->getOperand(3).getImm(); + unsigned LMUL = MBBI->getOperand(4).getImm(); + unsigned Opcode = RISCV::VS1R_V; + unsigned SubRegIdx = RISCV::sub_vrm1_0; + if (LMUL == 2) { + Opcode = RISCV::VS2R_V; + SubRegIdx = RISCV::sub_vrm2_0; + } else if (LMUL == 4) { + Opcode = RISCV::VS4R_V; + SubRegIdx = RISCV::sub_vrm4_0; + } + + for (unsigned I = 0; I < NF; ++I) { + BuildMI(MBB, MBBI, DL, TII->get(Opcode)) + .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I)) + .addReg(Base); + if (I != NF - 1) + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADD), Base) + .addReg(Base) + .addReg(VL); + } + MBBI->eraseFromParent(); + return true; +} + +bool RISCVExpandPseudo::expandVRELOAD(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + const TargetRegisterInfo *TRI = + MBB.getParent()->getSubtarget().getRegisterInfo(); + DebugLoc DL = MBBI->getDebugLoc(); + Register DestReg = MBBI->getOperand(0).getReg(); + Register Base = MBBI->getOperand(1).getReg(); + Register VL = MBBI->getOperand(2).getReg(); + unsigned NF = MBBI->getOperand(3).getImm(); + unsigned LMUL = MBBI->getOperand(4).getImm(); + unsigned Opcode = RISCV::VL1RE8_V; + unsigned SubRegIdx = RISCV::sub_vrm1_0; + if (LMUL == 2) { + Opcode = RISCV::VL2RE8_V; + SubRegIdx = RISCV::sub_vrm2_0; + } else if (LMUL == 4) { + Opcode = RISCV::VL4RE8_V; + SubRegIdx = RISCV::sub_vrm4_0; + } + + for (unsigned I = 0; I < NF; ++I) { + BuildMI(MBB, MBBI, DL, TII->get(Opcode), + TRI->getSubReg(DestReg, SubRegIdx + I)) + .addReg(Base); + if (I != NF - 1) + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADD), Base) + .addReg(Base) + .addReg(VL); + } + MBBI->eraseFromParent(); + return true; +} + } // end of anonymous namespace INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo", diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -157,28 +157,55 @@ MachineFrameInfo &MFI = MF->getFrameInfo(); unsigned Opcode; - bool IsScalableVector = false; - if (RISCV::GPRRegClass.hasSubClassEq(RC)) + bool IsScalableVector = true; + bool IsZvlsseg = true; + if (RISCV::GPRRegClass.hasSubClassEq(RC)) { Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; - else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSH; - else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSW; - else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSD; - else if (RISCV::VRRegClass.hasSubClassEq(RC)) { + IsScalableVector = false; + } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVSPILL_M1; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVSPILL_M2; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVSPILL_M4; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVSPILL_M8; - IsScalableVector = true; + IsZvlsseg = false; + } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL2_M1; + } else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL2_M2; + } else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL2_M4; + } else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL3_M1; + } else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL3_M2; + } else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL4_M1; + } else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL4_M2; + } else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL5_M1; + } else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL6_M1; + } else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL7_M1; + } else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVSPILL8_M1; } else llvm_unreachable("Can't store this register to stack slot"); @@ -188,10 +215,16 @@ MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); MFI.setStackID(FI, TargetStackID::ScalableVector); - BuildMI(MBB, I, DL, get(Opcode)) - .addReg(SrcReg, getKillRegState(IsKill)) - .addFrameIndex(FI) - .addMemOperand(MMO); + auto MIB = BuildMI(MBB, I, DL, get(Opcode)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addFrameIndex(FI) + .addMemOperand(MMO); + if (IsZvlsseg) { + // For spilling/reloading Zvlsseg registers, append dummy fields for + // the vector length, the number of fields and LMUL. These three arguments + // will be used when expanding these pseudo instructions. + MIB.addReg(RISCV::X0).addImm(0).addImm(0); + } } else { MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, @@ -218,28 +251,55 @@ MachineFrameInfo &MFI = MF->getFrameInfo(); unsigned Opcode; - bool IsScalableVector = false; - if (RISCV::GPRRegClass.hasSubClassEq(RC)) + bool IsScalableVector = true; + bool IsZvlsseg = true; + if (RISCV::GPRRegClass.hasSubClassEq(RC)) { Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; - else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLH; - else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLW; - else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) + IsScalableVector = false; + } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLD; - else if (RISCV::VRRegClass.hasSubClassEq(RC)) { + IsScalableVector = false; + } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVRELOAD_M1; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVRELOAD_M2; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVRELOAD_M4; - IsScalableVector = true; + IsZvlsseg = false; } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoVRELOAD_M8; - IsScalableVector = true; + IsZvlsseg = false; + } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD2_M1; + } else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD2_M2; + } else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD2_M4; + } else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD3_M1; + } else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD3_M2; + } else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD4_M1; + } else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD4_M2; + } else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD5_M1; + } else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD6_M1; + } else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD7_M1; + } else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) { + Opcode = RISCV::PseudoVRELOAD8_M1; } else llvm_unreachable("Can't load this register from stack slot"); @@ -249,9 +309,15 @@ MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); MFI.setStackID(FI, TargetStackID::ScalableVector); - BuildMI(MBB, I, DL, get(Opcode), DstReg) - .addFrameIndex(FI) - .addMemOperand(MMO); + auto MIB = BuildMI(MBB, I, DL, get(Opcode), DstReg) + .addFrameIndex(FI) + .addMemOperand(MMO); + if (IsZvlsseg) { + // For spilling/reloading Zvlsseg registers, append dummy fields for + // the vector length, the number of fields and LMUL. These three arguments + // will be used when expanding these pseudo instructions. + MIB.addReg(RISCV::X0).addImm(0).addImm(0); + } } else { MachineMemOperand *MMO = MF->getMachineMemOperand( MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3171,6 +3171,20 @@ def PseudoVRELOAD_M8 : VPseudo; } +foreach lmul = MxList.m in { + foreach nf = NFSet.L in { + defvar vreg = SegRegClass.RC; + let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1 in { + def "PseudoVSPILL" # nf # "_" # lmul.MX : + Pseudo<(outs), (ins vreg:$rs1, GPR:$rs2, GPR:$vlenb, uimm5:$nf, uimm5:$lmul), []>; + } + let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in { + def "PseudoVRELOAD" # nf # "_" # lmul.MX : + Pseudo<(outs vreg:$rs1), (ins GPR:$rs2, GPR:$vlenb, uimm5:$nf, uimm5:$lmul), []>; + } + } +} + //===----------------------------------------------------------------------===// // 6. Configuration-Setting Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -179,6 +179,77 @@ } } +static bool isRVVSpillForZvlsseg(unsigned Opcode, unsigned *NF, + unsigned *LMUL) { + unsigned N; + unsigned L; + switch (Opcode) { + default: + return false; + case RISCV::PseudoVSPILL2_M1: + case RISCV::PseudoVRELOAD2_M1: + N = 2; + L = 1; + break; + case RISCV::PseudoVSPILL2_M2: + case RISCV::PseudoVRELOAD2_M2: + N = 2; + L = 2; + break; + case RISCV::PseudoVSPILL2_M4: + case RISCV::PseudoVRELOAD2_M4: + N = 2; + L = 4; + break; + case RISCV::PseudoVSPILL3_M1: + case RISCV::PseudoVRELOAD3_M1: + N = 3; + L = 1; + break; + case RISCV::PseudoVSPILL3_M2: + case RISCV::PseudoVRELOAD3_M2: + N = 3; + L = 2; + break; + case RISCV::PseudoVSPILL4_M1: + case RISCV::PseudoVRELOAD4_M1: + N = 4; + L = 1; + break; + case RISCV::PseudoVSPILL4_M2: + case RISCV::PseudoVRELOAD4_M2: + N = 4; + L = 2; + break; + case RISCV::PseudoVSPILL5_M1: + case RISCV::PseudoVRELOAD5_M1: + N = 5; + L = 1; + break; + case RISCV::PseudoVSPILL6_M1: + case RISCV::PseudoVRELOAD6_M1: + N = 6; + L = 1; + break; + case RISCV::PseudoVSPILL7_M1: + case RISCV::PseudoVRELOAD7_M1: + N = 7; + L = 1; + break; + case RISCV::PseudoVSPILL8_M1: + case RISCV::PseudoVRELOAD8_M1: + N = 8; + L = 1; + break; + } + + if (NF) + *NF = N; + if (LMUL) + *LMUL = L; + return true; +} + void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { @@ -195,7 +266,8 @@ StackOffset Offset = getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg); bool isRVV = RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()) || - isRVVWholeLoadStore(MI.getOpcode()); + isRVVWholeLoadStore(MI.getOpcode()) || + isRVVSpillForZvlsseg(MI.getOpcode(), nullptr, nullptr); if (!isRVV) Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); @@ -268,6 +340,18 @@ if (!isRVV) MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); } + + MachineFrameInfo &MFI = MF.getFrameInfo(); + unsigned NF; + unsigned LMUL; + if (isRVVSpillForZvlsseg(MI.getOpcode(), &NF, &LMUL)) { + int64_t ScalableValue = MFI.getObjectSize(FrameIndex) / NF; + Register FactorRegister = + TII->getVLENFactoredAmount(MF, MBB, II, ScalableValue); + MI.getOperand(FIOperandNum + 1).ChangeToRegister(FactorRegister, false); + MI.getOperand(FIOperandNum + 2).ChangeToImmediate(NF); + MI.getOperand(FIOperandNum + 3).ChangeToImmediate(LMUL); + } } Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -0,0 +1,299 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+m -O0 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O0 %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+m -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2 %s + +define @spill_zvlsseg_nxv1i32(i32* %base, i32 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv1r.v v25, v1 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vs1r.v v0, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs1r.v v1, (a0) +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vl1r.v v7, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl1r.v v8, (a0) +; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i32(i32* %base, i32 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv2i32(i32* %base, i32 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv1r.v v25, v1 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vs1r.v v0, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs1r.v v1, (a0) +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vl1r.v v7, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl1r.v v8, (a0) +; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i32(i32* %base, i32 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv4i32(i32* %base, i32 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv2r.v v26, v2 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vs2r.v v0, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v2, (a0) +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vl2r.v v6, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v8, (a0) +; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i32(i32* %base, i32 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv8i32(i32* %base, i32 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv4r.v v28, v4 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs4r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 2 +; SPILL-O2-NEXT: vs4r.v v0, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs4r.v v4, (a0) +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 2 +; SPILL-O2-NEXT: vl4r.v v4, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl4r.v v8, (a0) +; SPILL-O2-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i32(i32* %base, i32 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg3_nxv4i32(i32* %base, i32 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O0-NEXT: vlseg3e32.v v0, (a0) +; SPILL-O0-NEXT: vmv2r.v v26, v2 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: addi a3, zero, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O2-NEXT: vlseg3e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vs2r.v v0, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v2, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v4, (a0) +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vl2r.v v6, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v8, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v10, (a0) +; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2_v10m2 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: addi a1, zero, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i32(i32* %base, i32 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,,} %0, 1 + ret %1 +} + +declare {,} @llvm.riscv.vlseg2.nxv1i32(i32* , i32) +declare {,} @llvm.riscv.vlseg2.nxv2i32(i32* , i32) +declare {,} @llvm.riscv.vlseg2.nxv4i32(i32* , i32) +declare {,} @llvm.riscv.vlseg2.nxv8i32(i32* , i32) +declare {,,} @llvm.riscv.vlseg3.nxv4i32(i32* , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -0,0 +1,299 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+m -O0 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O0 %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+m -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2 %s + +define @spill_zvlsseg_nxv1i32(i32* %base, i64 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv1r.v v25, v1 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,mf2,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vs1r.v v0, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs1r.v v1, (a0) +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vl1r.v v7, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl1r.v v8, (a0) +; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i32(i32* %base, i64 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv2i32(i32* %base, i64 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv1r.v v25, v1 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vs1r.v v0, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs1r.v v1, (a0) +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: vl1r.v v7, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl1r.v v8, (a0) +; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i32(i32* %base, i64 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv4i32(i32* %base, i64 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv2r.v v26, v2 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vs2r.v v0, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v2, (a0) +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vl2r.v v6, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v8, (a0) +; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i32(i32* %base, i64 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg_nxv8i32(i32* %base, i64 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; SPILL-O0-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O0-NEXT: vmv4r.v v28, v4 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs4r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m4,ta,mu +; SPILL-O2-NEXT: vlseg2e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 2 +; SPILL-O2-NEXT: vs4r.v v0, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs4r.v v4, (a0) +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 2 +; SPILL-O2-NEXT: vl4r.v v4, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl4r.v v8, (a0) +; SPILL-O2-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i32(i32* %base, i64 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,} %0, 1 + ret %1 +} + +define @spill_zvlsseg3_nxv4i32(i32* %base, i64 %vl) nounwind { +; SPILL-O0-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O0: # %bb.0: # %entry +; SPILL-O0-NEXT: addi sp, sp, -16 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O0-NEXT: vlseg3e32.v v0, (a0) +; SPILL-O0-NEXT: vmv2r.v v26, v2 +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: #APP +; SPILL-O0-NEXT: #NO_APP +; SPILL-O0-NEXT: addi a0, sp, 16 +; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add sp, sp, a0 +; SPILL-O0-NEXT: addi sp, sp, 16 +; SPILL-O0-NEXT: ret +; +; SPILL-O2-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O2: # %bb.0: # %entry +; SPILL-O2-NEXT: addi sp, sp, -16 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: addi a3, zero, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu +; SPILL-O2-NEXT: vlseg3e32.v v0, (a0) +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vs2r.v v0, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v2, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vs2r.v v4, (a0) +; SPILL-O2-NEXT: #APP +; SPILL-O2-NEXT: #NO_APP +; SPILL-O2-NEXT: addi a0, sp, 16 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: vl2r.v v6, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v8, (a0) +; SPILL-O2-NEXT: add a0, a0, a1 +; SPILL-O2-NEXT: vl2r.v v10, (a0) +; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2_v10m2 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: addi a1, zero, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add sp, sp, a0 +; SPILL-O2-NEXT: addi sp, sp, 16 +; SPILL-O2-NEXT: ret +entry: + %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i32(i32* %base, i64 %vl) + call void asm sideeffect "", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() + %1 = extractvalue {,,} %0, 1 + ret %1 +} + +declare {,} @llvm.riscv.vlseg2.nxv1i32(i32* , i64) +declare {,} @llvm.riscv.vlseg2.nxv2i32(i32* , i64) +declare {,} @llvm.riscv.vlseg2.nxv4i32(i32* , i64) +declare {,} @llvm.riscv.vlseg2.nxv8i32(i32* , i64) +declare {,,} @llvm.riscv.vlseg3.nxv4i32(i32* , i64)