diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -65,6 +65,9 @@ bool canUseAsPrologue(const MachineBasicBlock &MBB) const override; bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; + bool isSupportedStackID(TargetStackID::Value ID) const override; + TargetStackID::Value getStackIDForScalableVectors() const override; + protected: const RISCVSubtarget &STI; @@ -73,6 +76,10 @@ void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DestReg, Register SrcReg, int64_t Val, MachineInstr::MIFlag Flag) const; + void adjustStackForRVV(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + int64_t Amount) const; + int64_t assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const; }; } #endif diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -242,10 +242,6 @@ // Get the alignment. Align StackAlign = getStackAlign(); - // Set Max Call Frame Size - uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign); - MFI.setMaxCallFrameSize(MaxCallSize); - // Make sure the frame is aligned. FrameSize = alignTo(FrameSize, StackAlign); @@ -293,16 +289,43 @@ static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } static SmallVector -getNonLibcallCSI(const std::vector &CSI) { +getNonLibcallCSI(const MachineFunction &MF, + const std::vector &CSI) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); SmallVector NonLibcallCSI; - for (auto &CS : CSI) - if (CS.getFrameIdx() >= 0) + for (auto &CS : CSI) { + int FI = CS.getFrameIdx(); + if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::Default) NonLibcallCSI.push_back(CS); + } return NonLibcallCSI; } +void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + int64_t Amount) const { + assert(Amount != 0 && "Did not need to adjust stack pointer for RVV."); + + const RISCVInstrInfo *TII = STI.getInstrInfo(); + Register SPReg = getSPReg(STI); + unsigned Opc = RISCV::ADD; + if (Amount < 0) { + Amount = -Amount; + Opc = RISCV::SUB; + } + + // 1. Multiply the number of v-slots to the length of registers + Register FactorRegister = TII->getVLENFactoredAmount(MF, MBB, MBBI, Amount); + // 2. SP = SP - RVV stack size + BuildMI(MBB, MBBI, DL, TII->get(Opc), SPReg) + .addReg(SPReg) + .addReg(FactorRegister); +} + void RISCVFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -364,9 +387,10 @@ // investigation. Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI.getStackSize(); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); + uint64_t RVVStackSize = RVFI->getRVVStackSize(); // Early exit if there is no need to allocate on the stack - if (RealStackSize == 0 && !MFI.adjustsStack()) + if (RealStackSize == 0 && !MFI.adjustsStack() && RVVStackSize == 0) return; // If the stack pointer has been marked as reserved, then produce an error if @@ -399,7 +423,7 @@ // to the stack, not before. // FIXME: assumes exactly one instruction is used to save each callee-saved // register. - std::advance(MBBI, getNonLibcallCSI(CSI).size()); + std::advance(MBBI, getNonLibcallCSI(MF, CSI).size()); // Iterate over list of callee-saved registers and emit .cfi_offset // directives. @@ -489,6 +513,9 @@ } } } + + if (RVVStackSize) + adjustStackForRVV(MF, MBB, MBBI, DL, -RVVStackSize); } void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, @@ -526,7 +553,7 @@ --MBBI; } - const auto &CSI = getNonLibcallCSI(MFI.getCalleeSavedInfo()); + const auto &CSI = getNonLibcallCSI(MF, MFI.getCalleeSavedInfo()); // Skip to before the restores of callee-saved registers // FIXME: assumes exactly one instruction is used to restore each @@ -538,6 +565,7 @@ uint64_t StackSize = MFI.getStackSize(); uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize(); uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize(); + uint64_t RVVStackSize = RVFI->getRVVStackSize(); // Restore the stack pointer using the value of the frame pointer. Only // necessary if the stack pointer was modified, meaning the stack size is @@ -546,6 +574,9 @@ assert(hasFP(MF) && "frame pointer should not have been eliminated"); adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg, -FPOffset, MachineInstr::FrameDestroy); + } else { + if (RVVStackSize) + adjustStackForRVV(MF, MBB, LastFrameDestroy, DL, RVVStackSize); } uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); @@ -578,12 +609,22 @@ // Callee-saved registers should be referenced relative to the stack // pointer (positive offset), otherwise use the frame pointer (negative // offset). - const auto &CSI = getNonLibcallCSI(MFI.getCalleeSavedInfo()); + const auto &CSI = getNonLibcallCSI(MF, MFI.getCalleeSavedInfo()); int MinCSFI = 0; int MaxCSFI = -1; - - int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + - MFI.getOffsetAdjustment(); + StackOffset Offset; + auto StackID = MFI.getStackID(FI); + + assert((StackID == TargetStackID::Default || + StackID == TargetStackID::ScalableVector) && + "Unexpected stack ID for the frame object."); + if (StackID == TargetStackID::Default) { + Offset = + StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + + MFI.getOffsetAdjustment()); + } else if (StackID == TargetStackID::ScalableVector) { + Offset = StackOffset::getScalable(MFI.getObjectOffset(FI)); + } uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); @@ -596,33 +637,86 @@ FrameReg = RISCV::X2; if (FirstSPAdjustAmount) - Offset += FirstSPAdjustAmount; + Offset += StackOffset::getFixed(FirstSPAdjustAmount); else - Offset += MFI.getStackSize(); + Offset += StackOffset::getFixed(MFI.getStackSize()); } else if (RI->needsStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { // If the stack was realigned, the frame pointer is set in order to allow // SP to be restored, so we need another base register to record the stack // after realignment. - if (hasBP(MF)) + if (hasBP(MF)) { FrameReg = RISCVABI::getBPReg(); - else + // |--------------------------| -- <-- FP + // | callee-saved registers | | + // |--------------------------| | MFI.getStackSize() + // | scalar local variables | | + // |--------------------------| -- + // | Realignment | | + // |--------------------------| -- <-- BP + // | RVV objects | | RVFI->getRVVStackSize() + // |--------------------------| -- + // | VarSize objects | | + // |--------------------------| -- <-- SP + } else { FrameReg = RISCV::X2; - Offset += MFI.getStackSize(); - if (FI < 0) - Offset += RVFI->getLibCallStackSize(); + // When using SP to access frame objects, we need to add RVV stack size. + // + // |--------------------------| -- <-- FP + // | callee-saved registers | | + // |--------------------------| | MFI.getStackSize() + // | scalar local variables | | + // |--------------------------| -- + // | Realignment | | + // |--------------------------| -- + // | RVV objects | | RVFI->getRVVStackSize() + // |--------------------------| -- <-- SP + Offset += StackOffset::getScalable(RVFI->getRVVStackSize()); + } + if (MFI.getStackID(FI) == TargetStackID::Default) { + Offset += StackOffset::getFixed(MFI.getStackSize()); + if (FI < 0) + Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); + } } else { FrameReg = RI->getFrameRegister(MF); if (hasFP(MF)) { - Offset += RVFI->getVarArgsSaveSize(); + Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); if (FI >= 0) - Offset -= RVFI->getLibCallStackSize(); + Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize()); + // When using FP to access scalable vector objects, we need to minus + // the frame size. + // + // |--------------------------| -- <-- FP + // | callee-saved registers | | + // |--------------------------| | MFI.getStackSize() + // | scalar local variables | | + // |--------------------------| -- (Offset of RVV objects is from here.) + // | RVV objects | + // |--------------------------| + // | VarSize objects | + // |--------------------------| <-- SP + if (MFI.getStackID(FI) == TargetStackID::ScalableVector) + Offset -= StackOffset::getFixed(MFI.getStackSize()); } else { - Offset += MFI.getStackSize(); - if (FI < 0) - Offset += RVFI->getLibCallStackSize(); + // When using SP to access frame objects, we need to add RVV stack size. + // + // |--------------------------| -- <-- FP + // | callee-saved registers | | + // |--------------------------| | MFI.getStackSize() + // | scalar local variables | | + // |--------------------------| -- + // | RVV objects | | RVFI->getRVVStackSize() + // |--------------------------| -- <-- SP + Offset += StackOffset::getScalable(RVFI->getRVVStackSize()); + if (MFI.getStackID(FI) == TargetStackID::Default) { + Offset += StackOffset::getFixed(MFI.getStackSize()); + if (FI < 0) + Offset += StackOffset::getFixed(RVFI->getLibCallStackSize()); + } } } - return StackOffset::getFixed(Offset); + + return Offset; } void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, @@ -670,6 +764,37 @@ } } +int64_t +RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const { + int64_t Offset = 0; + // Create a buffer of RVV objects to allocate. + SmallVector ObjectsToAllocate; + for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) { + unsigned StackID = MFI.getStackID(I); + if (StackID != TargetStackID::ScalableVector) + continue; + if (MFI.isDeadObjectIndex(I)) + continue; + + ObjectsToAllocate.push_back(I); + } + + // Allocate all RVV locals and spills + for (int FI : ObjectsToAllocate) { + // ObjectSize in bytes. + int64_t ObjectSize = MFI.getObjectSize(FI); + // If the data type is the fractional vector type, reserve one vector + // register for it. + if (ObjectSize < 8) + ObjectSize = 8; + // Currently, all scalable vector types are aligned to 8 bytes. + Offset = alignTo(Offset + ObjectSize, 8); + MFI.setObjectOffset(FI, -Offset); + } + + return Offset; +} + void RISCVFrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); @@ -686,6 +811,10 @@ RegInfo->getSpillAlign(*RC), false); RS->addScavengingFrameIndex(RegScavFI); } + + auto *RVFI = MF.getInfo(); + int64_t RVVStackSize = assignRVVStackObjectOffsets(MFI); + RVFI->setRVVStackSize(RVVStackSize); } // Not preserve stack space within prologue for outgoing variables when the @@ -786,7 +915,7 @@ } // Manually spill values not spilled by libcall. - const auto &NonLibcallCSI = getNonLibcallCSI(CSI); + const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); for (auto &CS : NonLibcallCSI) { // Insert the spill to the stack frame. Register Reg = CS.getReg(); @@ -811,7 +940,7 @@ // Manually restore values not restored by libcall. Insert in reverse order. // loadRegFromStackSlot can insert multiple instructions. - const auto &NonLibcallCSI = getNonLibcallCSI(CSI); + const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); for (auto &CS : reverse(NonLibcallCSI)) { Register Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); @@ -883,3 +1012,19 @@ // replacing the successor with our own tail return at the end of our block. return SuccMBB->isReturnBlock() && SuccMBB->size() == 1; } + +bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const { + switch (ID) { + case TargetStackID::Default: + case TargetStackID::ScalableVector: + return true; + case TargetStackID::NoAlloc: + case TargetStackID::SGPRSpill: + return false; + } + llvm_unreachable("Invalid TargetStackID::Value"); +} + +TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const { + return TargetStackID::ScalableVector; +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -133,6 +133,11 @@ insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, const outliner::Candidate &C) const override; + + Register getVLENFactoredAmount(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, + int64_t Amount) const; + protected: const RISCVSubtarget &STI; }; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -879,3 +879,43 @@ RISCVII::MO_CALL)); return It; } + +Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, + int64_t Amount) const { + assert(Amount > 0 && "There is no need to get VLEN scaled value."); + assert(Amount % 8 == 0 && + "Reserve the stack by the multiple of one vector size."); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + const RISCVInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + DebugLoc DL = II->getDebugLoc(); + int64_t NumOfVReg = Amount / 8; + + Register SizeOfVector = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), SizeOfVector); + Register FactorRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); + assert(isInt<12>(NumOfVReg) && + "Expect the number of vector registers within 12-bits."); + if (isPowerOf2_32(NumOfVReg)) { + uint32_t ShiftAmount = Log2_32(NumOfVReg); + if (ShiftAmount == 0) + return SizeOfVector; + BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), FactorRegister) + .addReg(SizeOfVector) + .addImm(ShiftAmount); + } else { + Register VN = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), VN) + .addReg(RISCV::X0) + .addImm(NumOfVReg); + assert(MF.getSubtarget().hasStdExtM() && + "M-extension must be enabled to calculate the vscaled size/offset."); + BuildMI(MBB, II, DL, TII->get(RISCV::MUL), FactorRegister) + .addReg(SizeOfVector) + .addReg(VN, RegState::Kill); + } + + return FactorRegister; +} diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -32,6 +32,8 @@ int MoveF64FrameIndex = -1; /// Size of any opaque stack adjustment due to save/restore libcalls. unsigned LibCallStackSize = 0; + /// Size of RVV stack. + uint64_t RVVStackSize = 0; public: RISCVMachineFunctionInfo(const MachineFunction &MF) {} @@ -58,6 +60,9 @@ return MF.getSubtarget().enableSaveRestore() && VarArgsSaveSize == 0 && !MF.getFrameInfo().hasTailCall(); } + + uint64_t getRVVStackSize() const { return RVVStackSize; } + void setRVVStackSize(uint64_t Size) { RVVStackSize = Size; } }; } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -164,12 +164,13 @@ int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); Register FrameReg; - int Offset = getFrameLowering(MF) - ->getFrameIndexReference(MF, FrameIndex, FrameReg) - .getFixed() + - MI.getOperand(FIOperandNum + 1).getImm(); + StackOffset Offset = + getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg); + const auto *RVVInfo = RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); + if (!RVVInfo) + Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); - if (!isInt<32>(Offset)) { + if (!isInt<32>(Offset.getFixed())) { report_fatal_error( "Frame offsets outside of the signed 32-bit range not supported"); } @@ -177,23 +178,67 @@ MachineBasicBlock &MBB = *MI.getParent(); bool FrameRegIsKill = false; - if (!isInt<12>(Offset)) { - assert(isInt<32>(Offset) && "Int32 expected"); + if (!isInt<12>(Offset.getFixed())) { // The offset won't fit in an immediate, so use a scratch register instead // Modify Offset and FrameReg appropriately Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - TII->movImm(MBB, II, DL, ScratchReg, Offset); + TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed()); BuildMI(MBB, II, DL, TII->get(RISCV::ADD), ScratchReg) .addReg(FrameReg) .addReg(ScratchReg, RegState::Kill); - Offset = 0; + Offset = StackOffset::get(0, Offset.getScalable()); FrameReg = ScratchReg; FrameRegIsKill = true; } - MI.getOperand(FIOperandNum) - .ChangeToRegister(FrameReg, false, false, FrameRegIsKill); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + if (!Offset.getScalable()) { + // Offset = (fixed offset, 0) + if (!RVVInfo) { + MI.getOperand(FIOperandNum) + .ChangeToRegister(FrameReg, false, false, FrameRegIsKill); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); + } else { + if (Offset.getFixed()) { + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), ScratchReg) + .addReg(FrameReg) + .addImm(Offset.getFixed()); + MI.getOperand(FIOperandNum) + .ChangeToRegister(ScratchReg, false, false, true); + } + } + } else { + // Offset = (fixed offset, scalable offset) + unsigned Opc = RISCV::ADD; + int64_t ScalableValue = Offset.getScalable(); + if (ScalableValue < 0) { + ScalableValue = -ScalableValue; + Opc = RISCV::SUB; + } + + // 1. Get vlenb && multiply vlen with number of vector register. + Register FactorRegister = + TII->getVLENFactoredAmount(MF, MBB, II, ScalableValue); + + // 2. Calculate address: FrameReg + result of multiply + Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(Opc), VL) + .addReg(FrameReg) + .addReg(FactorRegister); + + if (RVVInfo && Offset.getFixed()) { + // Scalable load/store has no immediate argument. We need to add the + // fixed part into the load/store base address. + BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), VL) + .addReg(VL) + .addImm(Offset.getFixed()); + } + + // 3. Replace address register with calculated address register + MI.getOperand(FIOperandNum).ChangeToRegister(VL, false, false, true); + if (!RVVInfo) + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); + } } Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const { diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -O2 < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IV + +define @access_fixed_object(i64 *%val) { +; RV64IV-LABEL: access_fixed_object: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -528 +; RV64IV-NEXT: .cfi_def_cfa_offset 528 +; RV64IV-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; RV64IV-NEXT: addi a1, sp, 8 +; RV64IV-NEXT: vle64.v v8, (a1) +; RV64IV-NEXT: ld a1, 520(sp) +; RV64IV-NEXT: sd a1, 0(a0) +; RV64IV-NEXT: addi sp, sp, 528 +; RV64IV-NEXT: ret + %local = alloca i64 + %array = alloca [64 x i64] + %vptr = bitcast [64 x i64]* %array to * + %v = load , * %vptr + %len = load i64, i64* %local + store i64 %len, i64* %val + ret %v +} + +declare @llvm.riscv.vadd.nxv1i64.nxv1i64( + , + , + i64); + +define @access_fixed_and_vector_objects(i64 *%val) { +; RV64IV-LABEL: access_fixed_and_vector_objects: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -528 +; RV64IV-NEXT: .cfi_def_cfa_offset 528 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 8 +; RV64IV-NEXT: vle64.v v25, (a0) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: ld a0, 520(a0) +; RV64IV-NEXT: vle64.v v26, (sp) +; RV64IV-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; RV64IV-NEXT: vadd.vv v8, v25, v26 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: addi sp, sp, 528 +; RV64IV-NEXT: ret + %local = alloca i64 + %vector = alloca + %array = alloca [64 x i64] + %vptr = bitcast [64 x i64]* %array to * + %v1 = load , * %vptr + %v2 = load , * %vector + %len = load i64, i64* %local + + %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64( + %v1, + %v2, + i64 %len) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -0,0 +1,394 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define void @lmul1() nounwind { +; CHECK-LABEL: lmul1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret + %v = alloca + ret void +} + +define void @lmul2() nounwind { +; CHECK-LABEL: lmul2: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret + %v = alloca + ret void +} + +define void @lmul4() nounwind { +; CHECK-LABEL: lmul4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v = alloca + ret void +} + +define void @lmul8() nounwind { +; CHECK-LABEL: lmul8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -64 +; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: ret + %v = alloca + ret void +} + +define void @lmul1_and_2() nounwind { +; CHECK-LABEL: lmul1_and_2: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul2_and_4() nounwind { +; CHECK-LABEL: lmul2_and_4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul1_and_4() nounwind { +; CHECK-LABEL: lmul1_and_4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 5 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul2_and_1() nounwind { +; CHECK-LABEL: lmul2_and_1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul4_and_1() nounwind { +; CHECK-LABEL: lmul4_and_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 5 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul4_and_2() nounwind { +; CHECK-LABEL: lmul4_and_2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + ret void +} + +define void @lmul4_and_2_x2_0() nounwind { +; CHECK-LABEL: lmul4_and_2_x2_0: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + %v3 = alloca + %v4 = alloca + ret void +} + +define void @lmul4_and_2_x2_1() nounwind { +; CHECK-LABEL: lmul4_and_2_x2_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %v1 = alloca + %v3 = alloca + %v2 = alloca + %v4 = alloca + ret void +} + + +define void @gpr_and_lmul1_and_2() nounwind { +; CHECK-LABEL: gpr_and_lmul1_and_2: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: addi a2, zero, 3 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: sd a0, 8(a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 3 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %x1 = alloca i64 + %v1 = alloca + %v2 = alloca + store volatile i64 3, i64* %x1 + ret void +} + +define void @gpr_and_lmul1_and_4() nounwind { +; CHECK-LABEL: gpr_and_lmul1_and_4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 32 +; CHECK-NEXT: andi sp, sp, -32 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 5 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: addi a2, zero, 5 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: sd a0, 8(a1) +; CHECK-NEXT: addi sp, s0, -32 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret + %x1 = alloca i64 + %v1 = alloca + %v2 = alloca + store volatile i64 3, i64* %x1 + ret void +} + +define void @lmul_1_2_4_8() nounwind { +; CHECK-LABEL: lmul_1_2_4_8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 15 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -64 +; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + %v4 = alloca + %v8 = alloca + ret void +} + +define void @lmul_1_2_4_8_x2_0() nounwind { +; CHECK-LABEL: lmul_1_2_4_8_x2_0: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -64 +; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + %v3 = alloca + %v4 = alloca + %v5 = alloca + %v6 = alloca + %v7 = alloca + %v8 = alloca + ret void +} + +define void @lmul_1_2_4_8_x2_1() nounwind { +; CHECK-LABEL: lmul_1_2_4_8_x2_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi s0, sp, 64 +; CHECK-NEXT: andi sp, sp, -64 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a1, zero, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi sp, s0, -64 +; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: ret + %v8 = alloca + %v7 = alloca + %v6 = alloca + %v5 = alloca + %v4 = alloca + %v3 = alloca + %v2 = alloca + %v1 = alloca + ret void +} + +define void @masks() nounwind { +; CHECK-LABEL: masks: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ret + %v1 = alloca + %v2 = alloca + %v4 = alloca + %v8 = alloca + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -0,0 +1,287 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IV + +define void @local_var_mf8() { +; RV64IV-LABEL: local_var_mf8: +; RV64IV: # %bb.0: +; RV64IV-NEXT: .cfi_def_cfa_offset 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: vle8.v v25, (a0) +; RV64IV-NEXT: vle8.v v25, (sp) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @local_var_m1() { +; RV64IV-LABEL: local_var_m1: +; RV64IV: # %bb.0: +; RV64IV-NEXT: .cfi_def_cfa_offset 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: vle8.v v25, (a0) +; RV64IV-NEXT: vle8.v v25, (sp) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @local_var_m2() { +; RV64IV-LABEL: local_var_m2: +; RV64IV: # %bb.0: +; RV64IV-NEXT: .cfi_def_cfa_offset 0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: vle8.v v26, (a0) +; RV64IV-NEXT: vle8.v v26, (sp) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @local_var_m4() { +; RV64IV-LABEL: local_var_m4: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -32 +; RV64IV-NEXT: .cfi_def_cfa_offset 32 +; RV64IV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 32 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: andi sp, sp, -32 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 3 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: vle8.v v28, (a0) +; RV64IV-NEXT: vle8.v v28, (sp) +; RV64IV-NEXT: addi sp, s0, -32 +; RV64IV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 32 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @local_var_m8() { +; RV64IV-LABEL: local_var_m8: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -64 +; RV64IV-NEXT: .cfi_def_cfa_offset 64 +; RV64IV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 64 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: andi sp, sp, -64 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 4 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 3 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: vle8.v v8, (a0) +; RV64IV-NEXT: vle8.v v8, (sp) +; RV64IV-NEXT: addi sp, s0, -64 +; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 64 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @local_var_m2_mix_local_scalar() { +; RV64IV-LABEL: local_var_m2_mix_local_scalar: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -16 +; RV64IV-NEXT: .cfi_def_cfa_offset 16 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: lw a0, 12(a0) +; RV64IV-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: vle8.v v26, (a0) +; RV64IV-NEXT: vle8.v v26, (sp) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: lw a0, 8(a0) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: addi sp, sp, 16 +; RV64IV-NEXT: ret + %local_scalar0 = alloca i32 + %local0 = alloca + %local1 = alloca + %local_scalar1 = alloca i32 + load volatile i32, i32* %local_scalar0 + load volatile , * %local0 + load volatile , * %local1 + load volatile i32, i32* %local_scalar1 + ret void +} + +define void @local_var_m2_with_varsize_object(i64 %n) { +; RV64IV-LABEL: local_var_m2_with_varsize_object: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -16 +; RV64IV-NEXT: .cfi_def_cfa_offset 16 +; RV64IV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 16 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: slli a1, a1, 2 +; RV64IV-NEXT: sub sp, sp, a1 +; RV64IV-NEXT: addi a0, a0, 15 +; RV64IV-NEXT: andi a0, a0, -16 +; RV64IV-NEXT: sub a0, sp, a0 +; RV64IV-NEXT: mv sp, a0 +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: slli a1, a1, 1 +; RV64IV-NEXT: sub a1, s0, a1 +; RV64IV-NEXT: addi a1, a1, -16 +; RV64IV-NEXT: call notdead@plt +; RV64IV-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: sub a0, s0, a0 +; RV64IV-NEXT: addi a0, a0, -16 +; RV64IV-NEXT: vle8.v v26, (a0) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: sub a0, s0, a0 +; RV64IV-NEXT: addi a0, a0, -16 +; RV64IV-NEXT: vle8.v v26, (a0) +; RV64IV-NEXT: addi sp, s0, -16 +; RV64IV-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 16 +; RV64IV-NEXT: ret + %1 = alloca i8, i64 %n + %2 = alloca + %3 = alloca + call void @notdead(i8* %1, * %2) + load volatile , * %2 + load volatile , * %3 + ret void +} + +define void @local_var_m2_with_bp(i64 %n) { +; RV64IV-LABEL: local_var_m2_with_bp: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -256 +; RV64IV-NEXT: .cfi_def_cfa_offset 256 +; RV64IV-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s1, 232(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: .cfi_offset s1, -24 +; RV64IV-NEXT: addi s0, sp, 256 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: andi sp, sp, -128 +; RV64IV-NEXT: mv s1, sp +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: slli a1, a1, 2 +; RV64IV-NEXT: sub sp, sp, a1 +; RV64IV-NEXT: addi a0, a0, 15 +; RV64IV-NEXT: andi a0, a0, -16 +; RV64IV-NEXT: sub a0, sp, a0 +; RV64IV-NEXT: mv sp, a0 +; RV64IV-NEXT: addi a1, s1, 128 +; RV64IV-NEXT: csrr a2, vlenb +; RV64IV-NEXT: slli a2, a2, 1 +; RV64IV-NEXT: sub a2, s1, a2 +; RV64IV-NEXT: mv a2, a2 +; RV64IV-NEXT: call notdead2@plt +; RV64IV-NEXT: lw a0, 124(s1) +; RV64IV-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: sub a0, s1, a0 +; RV64IV-NEXT: vle8.v v26, (a0) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: sub a0, s1, a0 +; RV64IV-NEXT: vle8.v v26, (a0) +; RV64IV-NEXT: lw a0, 120(s1) +; RV64IV-NEXT: addi sp, s0, -256 +; RV64IV-NEXT: ld s1, 232(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld s0, 240(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 256 +; RV64IV-NEXT: ret + %1 = alloca i8, i64 %n + %2 = alloca i32, align 128 + %local_scalar0 = alloca i32 + %local0 = alloca + %local1 = alloca + %local_scalar1 = alloca i32 + call void @notdead2(i8* %1, i32* %2, * %local0) + load volatile i32, i32* %local_scalar0 + load volatile , * %local0 + load volatile , * %local1 + load volatile i32, i32* %local_scalar1 + ret void +} + +declare void @notdead(i8*, *) +declare void @notdead2(i8*, i32*, *) diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -O2 < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IV + +declare @llvm.riscv.vmacc.nxv64i8.nxv64i8( + , + , + , + i64); + +define @callee( %arg0, %arg1, %arg2) { +; RV64IV-LABEL: callee: +; RV64IV: # %bb.0: +; RV64IV-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; RV64IV-NEXT: vle8.v v24, (a0) +; RV64IV-NEXT: addi a0, zero, 1024 +; RV64IV-NEXT: vsetvli a0, a0, e8,m8,tu,mu +; RV64IV-NEXT: vmacc.vv v8, v16, v24 +; RV64IV-NEXT: ret + %ret = call @llvm.riscv.vmacc.nxv64i8.nxv64i8( + %arg0, + %arg1, + %arg2, i64 1024) + ret %ret +} + +define @caller() { +; RV64IV-LABEL: caller: +; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -64 +; RV64IV-NEXT: .cfi_def_cfa_offset 64 +; RV64IV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64IV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64IV-NEXT: .cfi_offset ra, -8 +; RV64IV-NEXT: .cfi_offset s0, -16 +; RV64IV-NEXT: addi s0, sp, 64 +; RV64IV-NEXT: .cfi_def_cfa s0, 0 +; RV64IV-NEXT: andi sp, sp, -64 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 5 +; RV64IV-NEXT: sub sp, sp, a0 +; RV64IV-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: addi a1, zero, 24 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: vle8.v v8, (a0) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 4 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: vle8.v v16, (a0) +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 3 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: vle8.v v24, (a0) +; RV64IV-NEXT: mv a0, sp +; RV64IV-NEXT: vse8.v v24, (sp) +; RV64IV-NEXT: call callee@plt +; RV64IV-NEXT: addi sp, s0, -64 +; RV64IV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64IV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64IV-NEXT: addi sp, sp, 64 +; RV64IV-NEXT: ret + %local0 = alloca + %local1 = alloca + %local2 = alloca + %arg0 = load volatile , * %local0 + %arg1 = load volatile , * %local1 + %arg2 = load volatile , * %local2 + %ret = call @callee( %arg0, + %arg1, + %arg2) + ret %ret +}