diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -348,6 +348,7 @@ IO.enumCase(ID, "default", TargetStackID::Default); IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill); IO.enumCase(ID, "sve-vec", TargetStackID::SVEVector); + IO.enumCase(ID, "riscv-vec", TargetStackID::RISCVVector); IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc); } }; diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -17,6 +17,7 @@ #include "llvm/CodeGen/Register.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/DataTypes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include #include @@ -221,6 +222,10 @@ /// sized objects have been allocated yet. bool HasVarSizedObjects = false; + /// This boolean keeps track of whether any scalable + /// vector objects have been allocated yet. + bool HasScalableVectorObjects = false; + /// This boolean keeps track of whether there is a call /// to builtin \@llvm.frameaddress. bool FrameAddressTaken = false; @@ -350,6 +355,11 @@ /// contains any variable sized objects. bool hasVarSizedObjects() const { return HasVarSizedObjects; } + /// This method may be called any time after instruction + /// selection is complete to determine if the stack frame for this function + /// contains any scalable vector objects. + bool hasScalableVectorObjects() const { return HasScalableVectorObjects; } + /// Return the index for the stack protector object. int getStackProtectorIndex() const { return StackProtectorIdx; } void setStackProtectorIndex(int I) { StackProtectorIdx = I; } @@ -726,6 +736,8 @@ assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() && "Invalid Object Idx!"); Objects[ObjectIdx+NumFixedObjects].StackID = ID; + if (ID == TargetStackID::SVEVector || ID == TargetStackID::RISCVVector) + HasScalableVectorObjects = true; // If ID > 0, MaxAlignment may now be overly conservative. // If ID == 0, MaxAlignment will need to be updated separately. } diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -28,6 +28,7 @@ Default = 0, SGPRSpill = 1, SVEVector = 2, + RISCVVector = 3, NoAlloc = 255 }; } diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp --- a/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -60,6 +60,9 @@ assert(Index >= 0 && "Bad frame index!"); if (StackID == 0) ensureMaxAlignment(Alignment); + else if (StackID == TargetStackID::SVEVector || + StackID == TargetStackID::RISCVVector) + HasScalableVectorObjects = true; return Index; } diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -167,17 +167,19 @@ FrameIndex = MF->getFrameInfo().CreateFixedObject( TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true); MF->getFrameInfo().setObjectAlignment(FrameIndex, Alignment); + // Scalable vectors may need a special StackID to distinguish + // them from other (fixed size) stack objects. + if (isa(Ty)) + MF->getFrameInfo().setStackID( + FrameIndex, TFI->getStackIDForScalableVectors()); } else { - FrameIndex = MF->getFrameInfo().CreateStackObject(TySize, Alignment, - false, AI); + FrameIndex = MF->getFrameInfo().CreateStackObject( + TySize, Alignment, false, AI, + isa(Ty) + ? TFI->getStackIDForScalableVectors() + : TargetStackID::Default); } - // Scalable vectors may need a special StackID to distinguish - // them from other (fixed size) stack objects. - if (isa(Ty)) - MF->getFrameInfo().setStackID(FrameIndex, - TFI->getStackIDForScalableVectors()); - StaticAllocaMap[AI] = FrameIndex; // Update the catch handler information. if (Iter != CatchObjects.end()) { diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -788,6 +788,7 @@ case TargetStackID::SGPRSpill: return true; case TargetStackID::SVEVector: + case TargetStackID::RISCVVector: return false; } llvm_unreachable("Invalid TargetStackID::Value"); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -65,6 +65,9 @@ bool canUseAsPrologue(const MachineBasicBlock &MBB) const override; bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; + bool isSupportedStackID(TargetStackID::Value ID) const override; + TargetStackID::Value getStackIDForScalableVectors() const override; + /// targetHandlesStackFrameRounding - Returns true if the target is /// responsible for rounding up the stack frame (probably at emitPrologue /// time). @@ -78,6 +81,12 @@ void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DestReg, Register SrcReg, int64_t Val, MachineInstr::MIFlag Flag) const; + void prepareStorageSpilledVR(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const MachineFrameInfo &MFI, + MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + const DebugLoc &DL) const; }; } #endif diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -222,7 +222,7 @@ const MachineFrameInfo &MFI = MF.getFrameInfo(); return MF.getTarget().Options.DisableFramePointerElim(MF) || RegInfo->needsStackRealignment(MF) || MFI.hasVarSizedObjects() || - MFI.isFrameAddressTaken(); + MFI.isFrameAddressTaken() || MFI.hasScalableVectorObjects(); } bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const { @@ -240,6 +240,42 @@ // Get the number of bytes to allocate from the FrameInfo. uint64_t FrameSize = MFI.getStackSize(); + // Account all RVV frame objects taking the size of a pointer. + // In the current implementation, we will reserve stack space for RVV + // objects and store the base pointers into the frame. We need to reserve + // frame space for these pointers. + // + // |---------| <-- frame pointer + // | | + // | | + // |---------| + // | new sp | + // |---------| <-- sp + // | RVV | size = VLENB x LMUL + // | objects | + // |---------| <-- new sp = sp - size + for (int FI = MFI.getObjectIndexBegin(), EFI = MFI.getObjectIndexEnd(); + FI < EFI; FI++) { + uint8_t StackID = MFI.getStackID(FI); + if (StackID == TargetStackID::Default) + continue; + if (MFI.isDeadObjectIndex(FI)) + continue; + + switch (StackID) { + case TargetStackID::RISCVVector: { + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + FrameSize = + alignTo(FrameSize, RegInfo->getSpillAlignment(RISCV::GPRRegClass)); + FrameSize += RegInfo->getSpillSize(RISCV::GPRRegClass); + MFI.setObjectOffset(FI, -FrameSize); + break; + } + default: + llvm_unreachable("Unexpected StackID"); + } + } + // targetHandlesStackFrameRounding() will return true and handle the alignment // here. // MaxCallFrameSize is defined through ADJCALLSTACKDOWN. @@ -316,6 +352,19 @@ return NonLibcallCSI; } +bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { + switch (ID) { + case TargetStackID::Default: + case TargetStackID::RISCVVector: + return true; + case TargetStackID::NoAlloc: + case TargetStackID::SGPRSpill: + case TargetStackID::SVEVector: + return false; + } + llvm_unreachable("Invalid TargetStackID::Value"); +} + void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -402,7 +451,8 @@ unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); const auto &CSI = MFI.getCalleeSavedInfo(); @@ -412,7 +462,14 @@ // to the stack, not before. // FIXME: assumes exactly one instruction is used to save each callee-saved // register. - std::advance(MBBI, getNonLibcallCSI(CSI).size()); + // RVV registers break this assumption but they are to be handled after we + // adjust the FP. + int InsnToSkip = getNonLibcallCSI(CSI).size(); + for (auto &CS : CSI) { + if (RISCV::VRRegClass.contains(CS.getReg())) + InsnToSkip--; + } + std::advance(MBBI, InsnToSkip); // Iterate over list of callee-saved registers and emit .cfi_offset // directives. @@ -430,7 +487,8 @@ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, RI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); } // Generate new FP. @@ -447,7 +505,8 @@ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( nullptr, RI->getDwarfRegNum(FPReg, true), RVFI->getVarArgsSaveSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); } // Emit the second SP adjustment after saving callee saved registers. @@ -465,7 +524,8 @@ unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); } } @@ -502,6 +562,91 @@ } } } + + prepareStorageSpilledVR(MF, MBB, MBBI, MFI, MF.getRegInfo(), *TII, DL); +} + +void RISCVFrameLowering::prepareStorageSpilledVR( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const MachineFrameInfo &MFI, + MachineRegisterInfo &MRI, const TargetInstrInfo &TII, + const DebugLoc &DL) const { + if (!MFI.hasScalableVectorObjects()) + return; + + unsigned SizeOfVector = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MBBI, DL, TII.get(RISCV::PseudoReadVLENB), SizeOfVector); + + // Classify the FIs. + std::array, 4> BinSizes; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + // Collect all RVV objects first. + for (int FI = MFI.getObjectIndexBegin(), EFI = MFI.getObjectIndexEnd(); + FI < EFI; FI++) { + int8_t StackID = MFI.getStackID(FI); + if (StackID == TargetStackID::Default) + continue; + if (MFI.isDeadObjectIndex(FI)) + continue; + assert(StackID == TargetStackID::RISCVVector && "Unexpected StackID"); + + int64_t ObjectSize = MFI.getObjectSize(FI); + + unsigned ShiftAmount; + // Mask objects may be logically smaller than the spill size of the VR + // class. Reserve the size of VR for mask objects. It is similar for + // fractional LMUL objects. + if (ObjectSize <= TRI->getSpillSize(RISCV::VRRegClass)) + ShiftAmount = 0; + else if (ObjectSize == TRI->getSpillSize(RISCV::VRM2RegClass)) + ShiftAmount = 1; + else if (ObjectSize == TRI->getSpillSize(RISCV::VRM4RegClass)) + ShiftAmount = 2; + else if (ObjectSize == TRI->getSpillSize(RISCV::VRM8RegClass)) + ShiftAmount = 3; + else + llvm_unreachable("Unexpected object size"); + + BinSizes[ShiftAmount].push_back(FI); + } + + // Do the actual allocation. We will allocate space for LMUL = 1 first, + // then LMUL 2, 4, and 8. + Register SPReg = getSPReg(STI); + for (const auto &Bin : BinSizes) { + if (Bin.empty()) + continue; + + Register FactorRegister = RISCV::NoRegister; + int ShiftAmount = &Bin - BinSizes.data(); + // The maximum LMUL is 8. + assert(0 <= ShiftAmount && ShiftAmount <= 3 && "Invalid shift amount!"); + if (ShiftAmount > 0) { + // FactorRegister = VLENB x LMUL. + FactorRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MBBI, DL, TII.get(RISCV::SLLI), FactorRegister) + .addReg(SizeOfVector) + .addImm(ShiftAmount); + } + + for (const int &FI : Bin) { + if (ShiftAmount > 0) { + assert(FactorRegister && "Invalid register!"); + bool LastUse = &FI == &Bin.back(); + // SP = SP - VLENB x LMUL. + BuildMI(MBB, MBBI, DL, TII.get(RISCV::SUB), SPReg) + .addReg(SPReg) + .addReg(FactorRegister, LastUse ? RegState::Kill : 0); + } else + // SP = SP - VLENB. + BuildMI(MBB, MBBI, DL, TII.get(RISCV::SUB), SPReg) + .addReg(SPReg) + .addReg(SizeOfVector); + + TII.storeRegToStackSlot(MBB, MBBI, SPReg, false, FI, + TRI->getMinimalPhysRegClass(SPReg), TRI); + } + } } void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, @@ -545,8 +690,15 @@ // FIXME: assumes exactly one instruction is used to restore each // callee-saved register. auto LastFrameDestroy = MBBI; - if (!CSI.empty()) - LastFrameDestroy = std::prev(MBBI, CSI.size()); + if (!CSI.empty()) { + // Ignore the VRs as we did in the prologue. + int InsnToSkip = CSI.size(); + for (auto &CS : CSI) { + if (RISCV::VRRegClass.contains(CS.getReg())) + InsnToSkip--; + } + LastFrameDestroy = std::prev(MBBI, InsnToSkip); + } uint64_t StackSize = MFI.getStackSize(); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); @@ -555,7 +707,8 @@ // Restore the stack pointer using the value of the frame pointer. Only // necessary if the stack pointer was modified, meaning the stack size is // unknown. - if (RI->needsStackRealignment(MF) || MFI.hasVarSizedObjects()) { + if (RI->needsStackRealignment(MF) || MFI.hasVarSizedObjects() || + MFI.hasScalableVectorObjects()) { assert(hasFP(MF) && "frame pointer should not have been eliminated"); adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg, -FPOffset, MachineInstr::FrameDestroy); @@ -605,14 +758,19 @@ MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } - if (FI >= MinCSFI && FI <= MaxCSFI) { + if ((FI >= MinCSFI && FI <= MaxCSFI) && + (MFI.getStackID(FI) == TargetStackID::Default)) { FrameReg = RISCV::X2; if (FirstSPAdjustAmount) Offset += FirstSPAdjustAmount; else Offset += MFI.getStackSize(); - } else if (RI->needsStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { + } else if (RI->needsStackRealignment(MF) && !MFI.isFixedObjectIndex(FI) && + !MFI.hasScalableVectorObjects()) { + // If there are RVV objects on the stack, SP will be adjusted after + // emitPrologue. We could not use SP to access the frame objects. + // // If the stack was realigned, the frame pointer is set in order to allow // SP to be restored, so we need another base register to record the stack // after realignment. @@ -705,7 +863,8 @@ // function contains variable size objects and let eliminateCallFramePseudoInstr // preserve stack space for it. bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - return !MF.getFrameInfo().hasVarSizedObjects(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return !(MFI.hasVarSizedObjects() || MFI.hasScalableVectorObjects()); } // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions. @@ -896,3 +1055,7 @@ // replacing the successor with our own tail return at the end of our block. return SuccMBB->isReturnBlock() && SuccMBB->size() == 1; } + +TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const { + return TargetStackID::RISCVVector; +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -2384,6 +2384,10 @@ def PseudoVMV8R_V : VPseudo; } +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in { + def PseudoReadVLENB : Pseudo<(outs GPR:$rd), (ins), []>; +} + //===----------------------------------------------------------------------===// // 6. Configuration-Setting Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp --- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp +++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -14,6 +14,7 @@ #include "RISCV.h" #include "RISCVSubtarget.h" #include "MCTargetDesc/RISCVMCExpr.h" +#include "Utils/RISCVBaseInfo.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" @@ -200,4 +201,13 @@ if (LowerRISCVMachineOperandToMCOperand(MO, MCOp, AP)) OutMI.addOperand(MCOp); } + + if (OutMI.getOpcode() == RISCV::PseudoReadVLENB) { + OutMI.setOpcode(RISCV::CSRRS); + OutMI.addOperand(MCOperand::createImm( + RISCVSysReg::lookupSysRegByName("VLENB")->Encoding)); + OutMI.addOperand(MCOperand::createReg(RISCV::X0)); + return; + } + } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -158,6 +158,7 @@ MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); const RISCVInstrInfo *TII = MF.getSubtarget().getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); @@ -166,8 +167,17 @@ Register FrameReg; int Offset = getFrameLowering(MF) ->getFrameIndexReference(MF, FrameIndex, FrameReg) - .getFixed() + - MI.getOperand(FIOperandNum + 1).getImm(); + .getFixed(); + bool NeedsIndirectAddressing = false; + bool IsRVV = RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()) != nullptr; + + // We store RVV frame object base addresses into the frame. Load the base + // address first, then load the RVV object itself. + if (IsRVV && MI.mayLoadOrStore()) + NeedsIndirectAddressing = + MFI.getStackID(FrameIndex) == TargetStackID::RISCVVector; + else + Offset += MI.getOperand(FIOperandNum + 1).getImm(); if (!isInt<32>(Offset)) { report_fatal_error( @@ -175,6 +185,21 @@ } MachineBasicBlock &MBB = *MI.getParent(); + if (NeedsIndirectAddressing) { + MachineOperand &SlotAddr = MI.getOperand(FIOperandNum); + Register RVVBaseReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + const auto &Subtarget = MF.getSubtarget(); + unsigned LoadOpcode = Subtarget.is64Bit() ? RISCV::LD : RISCV::LW; + MachineInstr *LoadRVVBase = + BuildMI(MBB, II, DL, TII->get(LoadOpcode), RVVBaseReg) + .add(SlotAddr) + .addImm(0); + SlotAddr.ChangeToRegister(RVVBaseReg, false, false, /*isKill=*/true); + + // Resolve FrameIndex in `LD/LW RVVBaseReg, FI, 0` + return eliminateFrameIndex(LoadRVVBase, /*SPAdj=*/0, 1, RS); + } + bool FrameRegIsKill = false; if (!isInt<12>(Offset)) { @@ -193,7 +218,8 @@ MI.getOperand(FIOperandNum) .ChangeToRegister(FrameReg, false, false, FrameRegIsKill); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + if (!(IsRVV && MI.mayLoadOrStore())) + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td --- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td +++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td @@ -371,4 +371,4 @@ def : SysReg<"vxrm", 0x00A>; def : SysReg<"vl", 0xC20>; def : SysReg<"vtype", 0xC21>; -def : SysReg<"vlenb", 0xC22>; +def VLENB : SysReg<"vlenb", 0xC22>; diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -0,0 +1,482 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define void @lmul1() nounwind { +; CHECK-LABEL: lmul1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v = alloca + ret void +} + +define void @lmul2() nounwind { +; CHECK-LABEL: lmul2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v = alloca + ret void +} + +define void @lmul4() nounwind { +; CHECK-LABEL: lmul4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v = alloca + ret void +} + +define void @lmul8() nounwind { +; CHECK-LABEL: lmul8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v = alloca + ret void +} + +define void @lmul1_and_2() nounwind { +; CHECK-LABEL: lmul1_and_2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul2_and_4() nounwind { +; CHECK-LABEL: lmul2_and_4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul1_and_4() nounwind { +; CHECK-LABEL: lmul1_and_4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul2_and_1() nounwind { +; CHECK-LABEL: lmul2_and_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul4_and_1() nounwind { +; CHECK-LABEL: lmul4_and_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul4_and_2() nounwind { +; CHECK-LABEL: lmul4_and_2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul4_and_2_x2_0() nounwind { +; CHECK-LABEL: lmul4_and_2_x2_0: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -48(s0) +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -40(s0) +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + %v3 = alloca + %v4 = alloca + ret void +} + +define void @lmul4_and_2_x2_1() nounwind { +; CHECK-LABEL: lmul4_and_2_x2_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -40(s0) +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -48(s0) +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret + %v1 = alloca + %v3 = alloca + %v2 = alloca + %v4 = alloca + ret void +} + + +define void @gpr_and_lmul1_and_2() nounwind { +; CHECK-LABEL: gpr_and_lmul1_and_2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -40(s0) +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: sd a0, -24(s0) +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret + %x1 = alloca i64 + %v1 = alloca + %v2 = alloca + store volatile i64 3, i64* %x1 + ret void +} + +define void @gpr_and_lmul1_and_4() nounwind { +; CHECK-LABEL: gpr_and_lmul1_and_4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -40(s0) +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: sd a0, -24(s0) +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret + %x1 = alloca i64 + %v1 = alloca + %v2 = alloca + store volatile i64 3, i64* %x1 + ret void +} + +define void @lmul_1_2_4_8() nounwind { +; CHECK-LABEL: lmul_1_2_4_8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -40(s0) +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -48(s0) +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + %v4 = alloca + %v8 = alloca + ret void +} + +define void @lmul_1_2_4_8_x2_0() nounwind { +; CHECK-LABEL: lmul_1_2_4_8_x2_0: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 80 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -40(s0) +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -48(s0) +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -56(s0) +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -64(s0) +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -72(s0) +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -80(s0) +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + %v3 = alloca + %v4 = alloca + %v5 = alloca + %v6 = alloca + %v7 = alloca + %v8 = alloca + ret void +} + +define void @lmul_1_2_4_8_x2_1() nounwind { +; CHECK-LABEL: lmul_1_2_4_8_x2_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -80 +; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 80 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -72(s0) +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -80(s0) +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -56(s0) +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -64(s0) +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -40(s0) +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: sd sp, -48(s0) +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: addi sp, s0, -80 +; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 80 +; CHECK-NEXT: ret + %v8 = alloca + %v7 = alloca + %v6 = alloca + %v5 = alloca + %v4 = alloca + %v3 = alloca + %v2 = alloca + %v1 = alloca + ret void +} + +define void @masks() nounwind { +; CHECK-LABEL: masks: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -48 +; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -24(s0) +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -32(s0) +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -40(s0) +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: sd sp, -48(s0) +; CHECK-NEXT: addi sp, s0, -48 +; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + %v4 = alloca + %v8 = alloca + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -0,0 +1,211 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IV + +define void @local_var_mf8() { +; RV64IV-LABEL: local_var_mf8: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -32 +; RV64IV-NEXT: .cfi_def_cfa_offset 32 +; RV64IV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 32 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -24(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -32(s0) +; RV64IV-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; RV64IV-NEXT: ld a0, -24(s0) +; RV64IV-NEXT: vle8.v v25, (a0) +; RV64IV-NEXT: ld a0, -32(s0) +; RV64IV-NEXT: vle8.v v25, (a0) +; RV64IV-NEXT: addi sp, s0, -32 +; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 32 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @local_var_m1() { +; RV64IV-LABEL: local_var_m1: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -32 +; RV64IV-NEXT: .cfi_def_cfa_offset 32 +; RV64IV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 32 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -24(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -32(s0) +; RV64IV-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; RV64IV-NEXT: ld a0, -24(s0) +; RV64IV-NEXT: vle8.v v25, (a0) +; RV64IV-NEXT: ld a0, -32(s0) +; RV64IV-NEXT: vle8.v v25, (a0) +; RV64IV-NEXT: addi sp, s0, -32 +; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 32 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @local_var_m2() { +; RV64IV-LABEL: local_var_m2: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -32 +; RV64IV-NEXT: .cfi_def_cfa_offset 32 +; RV64IV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 32 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -24(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -32(s0) +; RV64IV-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; RV64IV-NEXT: ld a0, -24(s0) +; RV64IV-NEXT: vle8.v v26, (a0) +; RV64IV-NEXT: ld a0, -32(s0) +; RV64IV-NEXT: vle8.v v26, (a0) +; RV64IV-NEXT: addi sp, s0, -32 +; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 32 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @local_var_m4() { +; RV64IV-LABEL: local_var_m4: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -32 +; RV64IV-NEXT: .cfi_def_cfa_offset 32 +; RV64IV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 32 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -24(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -32(s0) +; RV64IV-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; RV64IV-NEXT: ld a0, -24(s0) +; RV64IV-NEXT: vle8.v v28, (a0) +; RV64IV-NEXT: ld a0, -32(s0) +; RV64IV-NEXT: vle8.v v28, (a0) +; RV64IV-NEXT: addi sp, s0, -32 +; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 32 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @local_var_m8() { +; RV64IV-LABEL: local_var_m8: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -32 +; RV64IV-NEXT: .cfi_def_cfa_offset 32 +; RV64IV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 32 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 3 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -24(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -32(s0) +; RV64IV-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; RV64IV-NEXT: ld a0, -24(s0) +; RV64IV-NEXT: vle8.v v8, (a0) +; RV64IV-NEXT: ld a0, -32(s0) +; RV64IV-NEXT: vle8.v v8, (a0) +; RV64IV-NEXT: addi sp, s0, -32 +; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 32 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @local_var_m2_mix_local_scalar() { +; RV64IV-LABEL: local_var_m2_mix_local_scalar: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -48 +; RV64IV-NEXT: .cfi_def_cfa_offset 48 +; RV64IV-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 48 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -32(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -40(s0) +; RV64IV-NEXT: lw a0, -20(s0) +; RV64IV-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; RV64IV-NEXT: ld a0, -32(s0) +; RV64IV-NEXT: vle8.v v26, (a0) +; RV64IV-NEXT: ld a0, -40(s0) +; RV64IV-NEXT: vle8.v v26, (a0) +; RV64IV-NEXT: lw a0, -24(s0) +; RV64IV-NEXT: addi sp, s0, -48 +; RV64IV-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 48 +; RV64IV-NEXT: ret + %local_scalar0 = alloca i32 + %local0 = alloca + %local1 = alloca + %local_scalar1 = alloca i32 + load volatile i32, i32* %local_scalar0 + load volatile , * %local0 + load volatile , * %local1 + load volatile i32, i32* %local_scalar1 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args-with-scalar.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args-with-scalar.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/memory-args-with-scalar.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -O2 < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IV + +declare @llvm.riscv.vadd.nxv64i8.nxv64i8( + , + , + i64); + +define @callee( %arg0, %arg1, %arg2, i64 %vl, i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7) { +; RV64IV-LABEL: callee: +; RV64IV: # %bb.0: +; RV64IV-NEXT: vsetvli t0, zero, e8,m8,ta,mu +; RV64IV-NEXT: vle8.v v8, (a1) +; RV64IV-NEXT: vle8.v v24, (a0) +; RV64IV-NEXT: ld t0, 16(sp) +; RV64IV-NEXT: ld a1, 0(sp) +; RV64IV-NEXT: ld a0, 8(sp) +; RV64IV-NEXT: add a3, a3, a4 +; RV64IV-NEXT: add a4, a5, a6 +; RV64IV-NEXT: add a1, a7, a1 +; RV64IV-NEXT: add a0, a0, t0 +; RV64IV-NEXT: add a3, a3, a4 +; RV64IV-NEXT: add a0, a1, a0 +; RV64IV-NEXT: add a0, a3, a0 +; RV64IV-NEXT: add a0, a2, a0 +; RV64IV-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; RV64IV-NEXT: vadd.vv v16, v16, v24 +; RV64IV-NEXT: vadd.vv v16, v16, v8 +; RV64IV-NEXT: ret + %localsum0 = add i64 %a0, %a1 + %localsum1 = add i64 %a2, %a3 + %localsum2 = add i64 %a4, %a5 + %localsum3 = add i64 %a6, %a7 + %localsum4 = add i64 %localsum0, %localsum1 + %localsum5 = add i64 %localsum2, %localsum3 + %localsum6 = add i64 %localsum4, %localsum5 + %avl = add i64 %vl, %localsum6 + %tmp = call @llvm.riscv.vadd.nxv64i8.nxv64i8( + %arg0, + %arg1, i64 %avl) + %ret = call @llvm.riscv.vadd.nxv64i8.nxv64i8( + %tmp, + %arg2, i64 %avl) + ret %ret +} + +define @caller() { +; RV64IV-LABEL: caller: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -128 +; RV64IV-NEXT: .cfi_def_cfa_offset 128 +; RV64IV-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 128 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 3 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -96(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -104(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -112(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -120(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -128(s0) +; RV64IV-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; RV64IV-NEXT: ld a0, -96(s0) +; RV64IV-NEXT: vle8.v v16, (a0) +; RV64IV-NEXT: ld a0, -104(s0) +; RV64IV-NEXT: vle8.v v8, (a0) +; RV64IV-NEXT: ld a0, -112(s0) +; RV64IV-NEXT: vle8.v v24, (a0) +; RV64IV-NEXT: ld a2, -24(s0) +; RV64IV-NEXT: ld a3, -32(s0) +; RV64IV-NEXT: ld a4, -40(s0) +; RV64IV-NEXT: ld a5, -48(s0) +; RV64IV-NEXT: ld a6, -56(s0) +; RV64IV-NEXT: ld a7, -64(s0) +; RV64IV-NEXT: ld t0, -72(s0) +; RV64IV-NEXT: ld a1, -80(s0) +; RV64IV-NEXT: ld a0, -88(s0) +; RV64IV-NEXT: addi sp, sp, -32 +; RV64IV-NEXT: sd a0, 16(sp) +; RV64IV-NEXT: sd a1, 8(sp) +; RV64IV-NEXT: sd t0, 0(sp) +; RV64IV-NEXT: ld a0, -128(s0) +; RV64IV-NEXT: vse8.v v24, (a0) +; RV64IV-NEXT: addi a0, s0, -120 +; RV64IV-NEXT: addi a1, s0, -128 +; RV64IV-NEXT: ld t0, -120(s0) +; RV64IV-NEXT: vse8.v v8, (t0) +; RV64IV-NEXT: call callee +; RV64IV-NEXT: addi sp, sp, 32 +; RV64IV-NEXT: addi sp, s0, -128 +; RV64IV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 128 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + %local2 = alloca + %local3 = alloca i64 + %a0_addr = alloca i64 + %a1_addr = alloca i64 + %a2_addr = alloca i64 + %a3_addr = alloca i64 + %a4_addr = alloca i64 + %a5_addr = alloca i64 + %a6_addr = alloca i64 + %a7_addr = alloca i64 + %arg0 = load volatile , * %local0 + %arg1 = load volatile , * %local1 + %arg2 = load volatile , * %local2 + %vl = load volatile i64, i64* %local3 + %a0 = load volatile i64, i64* %a0_addr + %a1 = load volatile i64, i64* %a1_addr + %a2 = load volatile i64, i64* %a2_addr + %a3 = load volatile i64, i64* %a3_addr + %a4 = load volatile i64, i64* %a4_addr + %a5 = load volatile i64, i64* %a5_addr + %a6 = load volatile i64, i64* %a6_addr + %a7 = load volatile i64, i64* %a7_addr + %ret = call @callee( %arg0, + %arg1, + %arg2, + i64 %vl, + i64 %a0, i64 %a1, i64 %a2, i64 %a3, + i64 %a4, i64 %a5, i64 %a6, i64 %a7) + ret %ret +} diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -O2 < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IV + +declare @llvm.riscv.vadd.nxv64i8.nxv64i8( + , + , + i64); + +define @callee( %arg0, %arg1) { +; RV64IV-LABEL: callee: +; RV64IV: # %bb.0: +; RV64IV-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; RV64IV-NEXT: vle8.v v8, (a0) +; RV64IV-NEXT: addi a0, zero, 1024 +; RV64IV-NEXT: vsetvli a0, a0, e8,m8,ta,mu +; RV64IV-NEXT: vadd.vv v16, v16, v8 +; RV64IV-NEXT: ret + %ret = call @llvm.riscv.vadd.nxv64i8.nxv64i8( + %arg0, + %arg1, i64 1024) + ret %ret +} + +define @caller() { +; RV64IV-LABEL: caller: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -48 +; RV64IV-NEXT: .cfi_def_cfa_offset 48 +; RV64IV-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 48 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 3 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -24(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -32(s0) +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: sd sp, -40(s0) +; RV64IV-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; RV64IV-NEXT: ld a0, -24(s0) +; RV64IV-NEXT: vle8.v v16, (a0) +; RV64IV-NEXT: ld a0, -32(s0) +; RV64IV-NEXT: vle8.v v8, (a0) +; RV64IV-NEXT: addi a0, s0, -40 +; RV64IV-NEXT: ld a1, -40(s0) +; RV64IV-NEXT: vse8.v v8, (a1) +; RV64IV-NEXT: call callee +; RV64IV-NEXT: addi sp, s0, -48 +; RV64IV-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 48 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + %arg0 = load volatile , * %local0 + %arg1 = load volatile , * %local1 + %ret = call @callee( %arg0, + %arg1) + ret %ret +} diff --git a/llvm/test/CodeGen/RISCV/stack-realignment-with-scalable-vector-objects.ll b/llvm/test/CodeGen/RISCV/stack-realignment-with-scalable-vector-objects.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-realignment-with-scalable-vector-objects.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I + +declare void @callee(*, i32*) + +define void @caller() { +; RV32I-LABEL: caller: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -128 +; RV32I-NEXT: .cfi_def_cfa_offset 128 +; RV32I-NEXT: sw ra, 124(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 120(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: addi s0, sp, 128 +; RV32I-NEXT: .cfi_def_cfa s0, 0 +; RV32I-NEXT: andi sp, sp, -64 +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: sw sp, -68(s0) +; RV32I-NEXT: addi a0, s0, -68 +; RV32I-NEXT: addi a1, s0, -64 +; RV32I-NEXT: call callee@plt +; RV32I-NEXT: addi sp, s0, -128 +; RV32I-NEXT: lw s0, 120(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw ra, 124(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -128 +; RV64I-NEXT: .cfi_def_cfa_offset 128 +; RV64I-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: addi s0, sp, 128 +; RV64I-NEXT: .cfi_def_cfa s0, 0 +; RV64I-NEXT: andi sp, sp, -64 +; RV64I-NEXT: csrr a0, vlenb +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: sd sp, -72(s0) +; RV64I-NEXT: addi a0, s0, -72 +; RV64I-NEXT: addi a1, s0, -64 +; RV64I-NEXT: call callee@plt +; RV64I-NEXT: addi sp, s0, -128 +; RV64I-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 128 +; RV64I-NEXT: ret + %1 = alloca + %2 = alloca i32, align 64 + call void @callee(* %1, i32 *%2) + ret void +}