diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -21,10 +21,10 @@ add_llvm_target(RISCVCodeGen RISCVAsmPrinter.cpp RISCVCallLowering.cpp - RISCVCleanupVSETVLI.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp RISCVFrameLowering.cpp + RISCVInsertVSETVLI.cpp RISCVInstrInfo.cpp RISCVInstructionSelector.cpp RISCVISelDAGToDAG.cpp diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -86,7 +86,7 @@ VMConstraint = 0b100, }; -enum VLMUL { +enum VLMUL : uint8_t { LMUL_1 = 0, LMUL_2, LMUL_4, diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -46,8 +46,8 @@ FunctionPass *createRISCVExpandAtomicPseudoPass(); void initializeRISCVExpandAtomicPseudoPass(PassRegistry &); -FunctionPass *createRISCVCleanupVSETVLIPass(); -void initializeRISCVCleanupVSETVLIPass(PassRegistry &); +FunctionPass *createRISCVInsertVSETVLIPass(); +void initializeRISCVInsertVSETVLIPass(PassRegistry &); InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, diff --git a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp deleted file mode 100644 --- a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp +++ /dev/null @@ -1,163 +0,0 @@ -//===- RISCVCleanupVSETVLI.cpp - Cleanup unneeded VSETVLI instructions ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a function pass that removes duplicate vsetvli -// instructions within a basic block. -// -//===----------------------------------------------------------------------===// - -#include "RISCV.h" -#include "RISCVSubtarget.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -using namespace llvm; - -#define DEBUG_TYPE "riscv-cleanup-vsetvli" -#define RISCV_CLEANUP_VSETVLI_NAME "RISCV Cleanup VSETVLI pass" - -namespace { - -class RISCVCleanupVSETVLI : public MachineFunctionPass { -public: - static char ID; - - RISCVCleanupVSETVLI() : MachineFunctionPass(ID) { - initializeRISCVCleanupVSETVLIPass(*PassRegistry::getPassRegistry()); - } - bool runOnMachineFunction(MachineFunction &MF) override; - bool runOnMachineBasicBlock(MachineBasicBlock &MBB); - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::IsSSA); - } - - // This pass modifies the program, but does not modify the CFG - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - StringRef getPassName() const override { return RISCV_CLEANUP_VSETVLI_NAME; } -}; - -} // end anonymous namespace - -char RISCVCleanupVSETVLI::ID = 0; - -INITIALIZE_PASS(RISCVCleanupVSETVLI, DEBUG_TYPE, - RISCV_CLEANUP_VSETVLI_NAME, false, false) - -static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) { - // If we don't have a previous VSET{I}VLI or the VL output isn't dead, we - // can't remove this VSETVLI. - if (!PrevVSETVLI || !MI.getOperand(0).isDead()) - return false; - - // Does this VSET{I}VLI use the same VTYPE immediate. - int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm(); - int64_t VTYPEImm = MI.getOperand(2).getImm(); - if (PrevVTYPEImm != VTYPEImm) - return false; - - if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { - // If the previous opcode wasn't vsetivli we can't compare them. - if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETIVLI) - return false; - - // For VSETIVLI, we can just compare the immediates. - return PrevVSETVLI->getOperand(1).getImm() == MI.getOperand(1).getImm(); - } - - assert(MI.getOpcode() == RISCV::PseudoVSETVLI); - Register AVLReg = MI.getOperand(1).getReg(); - Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg(); - - // If this VSETVLI isn't changing VL, it is redundant. - if (AVLReg == RISCV::X0 && MI.getOperand(0).getReg() == RISCV::X0) - return true; - - // If the previous VSET{I}VLI's output (which isn't X0) is fed into this - // VSETVLI, this one isn't changing VL so is redundant. - // Only perform this on virtual registers to avoid the complexity of having - // to work out if the physical register was clobbered somewhere in between. - if (AVLReg.isVirtual() && AVLReg == PrevOutVL) - return true; - - // If the previous opcode isn't vsetvli we can't do any more comparison. - if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETVLI) - return false; - - // Does this VSETVLI use the same AVL register? - if (AVLReg != PrevVSETVLI->getOperand(1).getReg()) - return false; - - // If the AVLReg is X0 we must be setting VL to VLMAX. Keeping VL unchanged - // was handled above. - if (AVLReg == RISCV::X0) { - // This instruction is setting VL to VLMAX, this is redundant if the - // previous VSETVLI was also setting VL to VLMAX. But it is not redundant - // if they were setting it to any other value or leaving VL unchanged. - return PrevOutVL != RISCV::X0; - } - - // This vsetvli is redundant. - return true; -} - -bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) { - bool Changed = false; - MachineInstr *PrevVSETVLI = nullptr; - - for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) { - MachineInstr &MI = *MII++; - - if (MI.getOpcode() != RISCV::PseudoVSETVLI && - MI.getOpcode() != RISCV::PseudoVSETIVLI) { - if (PrevVSETVLI && - (MI.isCall() || MI.modifiesRegister(RISCV::VL) || - MI.modifiesRegister(RISCV::VTYPE))) { - // Old VL/VTYPE is overwritten. - PrevVSETVLI = nullptr; - } - continue; - } - - if (isRedundantVSETVLI(MI, PrevVSETVLI)) { - // This VSETVLI is redundant, remove it. - MI.eraseFromParent(); - Changed = true; - } else { - // Otherwise update VSET{I}VLI for the next iteration. - PrevVSETVLI = &MI; - } - } - - return Changed; -} - -bool RISCVCleanupVSETVLI::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction())) - return false; - - // Skip if the vector extension is not enabled. - const RISCVSubtarget &ST = MF.getSubtarget(); - if (!ST.hasStdExtV()) - return false; - - bool Changed = false; - - for (MachineBasicBlock &MBB : MF) - Changed |= runOnMachineBasicBlock(MBB); - - return Changed; -} - -/// Returns an instance of the Cleanup VSETVLI pass. -FunctionPass *llvm::createRISCVCleanupVSETVLIPass() { - return new RISCVCleanupVSETVLI(); -} diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -240,7 +240,8 @@ bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { - assert(MBBI->getNumOperands() == 5 && "Unexpected instruction format"); + assert(MBBI->getNumExplicitOperands() == 3 && MBBI->getNumOperands() >= 5 && + "Unexpected instruction format"); DebugLoc DL = MBBI->getDebugLoc(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6457,107 +6457,9 @@ return TailMBB; } -static MachineInstr *elideCopies(MachineInstr *MI, - const MachineRegisterInfo &MRI) { - while (true) { - if (!MI->isFullCopy()) - return MI; - if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) - return nullptr; - MI = MRI.getVRegDef(MI->getOperand(1).getReg()); - if (!MI) - return nullptr; - } -} - -static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, - int VLIndex, unsigned SEWIndex, - RISCVII::VLMUL VLMul, - bool ForceTailAgnostic) { - MachineFunction &MF = *BB->getParent(); - DebugLoc DL = MI.getDebugLoc(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - - unsigned Log2SEW = MI.getOperand(SEWIndex).getImm(); - unsigned SEW = 1 << Log2SEW; - assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); - - MachineRegisterInfo &MRI = MF.getRegInfo(); - - auto BuildVSETVLI = [&]() { - if (VLIndex >= 0) { - Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - const MachineOperand &VLOp = MI.getOperand(VLIndex); - - // VL can be a register or an immediate. - if (VLOp.isImm()) - return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) - .addReg(DestReg, RegState::Define | RegState::Dead) - .addImm(VLOp.getImm()); - - Register VLReg = MI.getOperand(VLIndex).getReg(); - return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) - .addReg(DestReg, RegState::Define | RegState::Dead) - .addReg(VLReg); - } - - // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). - return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill); - }; - - MachineInstrBuilder MIB = BuildVSETVLI(); - - // Default to tail agnostic unless the destination is tied to a source. In - // that case the user would have some control over the tail values. The tail - // policy is also ignored on instructions that only update element 0 like - // vmv.s.x or reductions so use agnostic there to match the common case. - // FIXME: This is conservatively correct, but we might want to detect that - // the input is undefined. - bool TailAgnostic = true; - unsigned UseOpIdx; - if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { - TailAgnostic = false; - // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. - const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); - if (UseMI) { - UseMI = elideCopies(UseMI, MRI); - if (UseMI && UseMI->isImplicitDef()) - TailAgnostic = true; - } - } - - // For simplicity we reuse the vtype representation here. - MIB.addImm(RISCVVType::encodeVTYPE(VLMul, SEW, - /*TailAgnostic*/ TailAgnostic, - /*MaskAgnostic*/ false)); - - // Remove (now) redundant operands from pseudo - if (VLIndex >= 0 && MI.getOperand(VLIndex).isReg()) { - MI.getOperand(VLIndex).setReg(RISCV::NoRegister); - MI.getOperand(VLIndex).setIsKill(false); - } - - return BB; -} - MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { - uint64_t TSFlags = MI.getDesc().TSFlags; - - if (RISCVII::hasSEWOp(TSFlags)) { - unsigned NumOperands = MI.getNumExplicitOperands(); - int VLIndex = RISCVII::hasVLOp(TSFlags) ? NumOperands - 2 : -1; - unsigned SEWIndex = NumOperands - 1; - bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags); - - RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); - return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); - } - switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -0,0 +1,389 @@ +//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function pass that inserts VSETVLI instructions where +// needed. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "riscv-insert-vsetvli" +#define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" + +namespace { + +class VSETVLIInfo { + union { + Register AVLReg; + unsigned AVLImm; + }; + + enum : uint8_t { + Uninitialized, + AVLIsReg, + AVLIsImm, + Unknown, + } State = Uninitialized; + + // Fields from VTYPE. + RISCVII::VLMUL VLMul = RISCVII::LMUL_1; + uint8_t SEW = 0; + bool TailAgnostic = false; + bool MaskAgnostic = false; + +public: + VSETVLIInfo() : AVLImm(0) {} + + bool isValid() const { return State != Uninitialized; } + void setUnknown() { State = Unknown; } + bool isUnknown() const { return State == Unknown; } + + void setAVLReg(Register Reg) { + AVLReg = Reg; + State = AVLIsReg; + } + + void setAVLImm(unsigned Imm) { + AVLImm = Imm; + State = AVLIsImm; + } + + bool hasAVLImm() const { return State == AVLIsImm; } + bool hasAVLReg() const { return State == AVLIsReg; } + Register getAVLReg() const { + assert(hasAVLReg()); + return AVLReg; + } + unsigned getAVLImm() const { + assert(hasAVLImm()); + return AVLImm; + } + + bool hasSameAVL(const VSETVLIInfo &Other) const { + assert(isValid() && Other.isValid() && + "Can't compare invalid VSETVLIInfos"); + assert(!isUnknown() && !Other.isUnknown() && + "Can't compare AVL in unknown state"); + if (hasAVLReg() && Other.hasAVLReg()) + return getAVLReg() == Other.getAVLReg(); + + if (hasAVLImm() && Other.hasAVLImm()) + return getAVLImm() == Other.getAVLImm(); + + return false; + } + + void setVTYPE(unsigned VType) { + assert(isValid() && !isUnknown() && + "Can't set VTYPE for uninitialized or unknown"); + VLMul = RISCVVType::getVLMUL(VType); + SEW = RISCVVType::getSEW(VType); + TailAgnostic = RISCVVType::isTailAgnostic(VType); + MaskAgnostic = RISCVVType::isMaskAgnostic(VType); + } + void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { + assert(isValid() && !isUnknown() && + "Can't set VTYPE for uninitialized or unknown"); + VLMul = L; + SEW = S; + TailAgnostic = TA; + MaskAgnostic = MA; + } + + unsigned encodeVTYPE() const { + return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); + } + + bool hasSameVTYPE(const VSETVLIInfo &Other) const { + assert(isValid() && Other.isValid() && + "Can't compare invalid VSETVLIInfos"); + assert(!isUnknown() && !Other.isUnknown() && + "Can't compare VTYPE in unknown state"); + return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == + std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, + Other.MaskAgnostic); + } + + bool isCompatible(const VSETVLIInfo &Other) const { + assert(isValid() && Other.isValid() && + "Can't compare invalid VSETVLIInfos"); + // Nothing is compatible with Unknown. + if (isUnknown() || Other.isUnknown()) + return false; + + // If other doesn't need an AVLReg and the SEW matches, consider it + // compatible. + if (Other.hasAVLReg() && Other.AVLReg == RISCV::NoRegister) { + if (SEW == Other.SEW) + return true; + } + + // VTypes must match. + if (!hasSameVTYPE(Other)) + return false; + + if (hasAVLImm() != Other.hasAVLImm()) + return false; + + if (hasAVLImm()) + return getAVLImm() == Other.getAVLImm(); + + return getAVLReg() == Other.getAVLReg(); + } +}; + +class RISCVInsertVSETVLI : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + +public: + static char ID; + + RISCVInsertVSETVLI() : MachineFunctionPass(ID) { + initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } + +private: + void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, + const VSETVLIInfo &Info); + + bool emitVSETVLIs(MachineBasicBlock &MBB); +}; + +} // end anonymous namespace + +char RISCVInsertVSETVLI::ID = 0; + +INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, + false, false) + +static MachineInstr *elideCopies(MachineInstr *MI, + const MachineRegisterInfo *MRI) { + while (true) { + if (!MI->isFullCopy()) + return MI; + if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) + return nullptr; + MI = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!MI) + return nullptr; + } +} + +static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, + const MachineRegisterInfo *MRI) { + VSETVLIInfo InstrInfo; + unsigned NumOperands = MI.getNumExplicitOperands(); + + RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); + + unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm(); + unsigned SEW = 1 << Log2SEW; + assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); + + // Default to tail agnostic unless the destination is tied to a source. + // Unless the source is undef. In that case the user would have some control + // over the tail values. The tail policy is also ignored on instructions + // that only update element 0 like vmv.s.x or reductions so use agnostic + // there to match the common case. + // FIXME: This is conservatively correct, but we might want to detect that + // the input is undefined. + bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags); + bool TailAgnostic = true; + unsigned UseOpIdx; + if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { + TailAgnostic = false; + // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. + const MachineOperand &UseMO = MI.getOperand(UseOpIdx); + MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); + if (UseMI) { + UseMI = elideCopies(UseMI, MRI); + if (UseMI && UseMI->isImplicitDef()) + TailAgnostic = true; + } + } + + if (RISCVII::hasVLOp(TSFlags)) { + const MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2); + if (VLOp.isImm()) + InstrInfo.setAVLImm(VLOp.getImm()); + else + InstrInfo.setAVLReg(VLOp.getReg()); + } else + InstrInfo.setAVLReg(RISCV::NoRegister); + InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic, + /*MaskAgnostic*/ false); + + return InstrInfo; +} + +void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, + const VSETVLIInfo &Info) { + DebugLoc DL = MI.getDebugLoc(); + + if (Info.hasAVLImm()) { + // TODO: Use X0 as the destination. + Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI)) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addImm(Info.getAVLImm()) + .addImm(Info.encodeVTYPE()); + return; + } + + Register AVLReg = Info.getAVLReg(); + if (AVLReg == RISCV::NoRegister) { + BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(Info.encodeVTYPE()) + .addReg(RISCV::VL, RegState::Implicit); + return; + } + + Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI)) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addReg(Info.getAVLReg()) + .addImm(Info.encodeVTYPE()); +} + +// Return a VSETVLIInfo representing the changes made by this VSETVLI or +// VSETIVLI instruction. +VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { + VSETVLIInfo NewInfo; + if (MI.getOpcode() == RISCV::PseudoVSETVLI) { + Register AVLReg = MI.getOperand(1).getReg(); + assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && + "Can't handle X0, X0 vsetvli yet"); + NewInfo.setAVLReg(AVLReg); + } else { + assert(MI.getOpcode() == RISCV::PseudoVSETIVLI); + NewInfo.setAVLImm(MI.getOperand(1).getImm()); + } + NewInfo.setVTYPE(MI.getOperand(2).getImm()); + + return NewInfo; +} + +bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { + bool MadeChange = false; + + // Assume predecessor state is unknown. + VSETVLIInfo CurInfo; + CurInfo.setUnknown(); + + for (MachineInstr &MI : MBB) { + // If this is an explicit VSETVLI or VSETIVLI, update our state. + if (MI.getOpcode() == RISCV::PseudoVSETVLI || + MI.getOpcode() == RISCV::PseudoVSETIVLI) { + // Conservatively, mark the VL and VTYPE as live. + assert(MI.getOperand(3).getReg() == RISCV::VL && + MI.getOperand(4).getReg() == RISCV::VTYPE && + "Unexpected operands where VL and VTYPE should be"); + MI.getOperand(3).setIsDead(false); + MI.getOperand(4).setIsDead(false); + MadeChange = true; + CurInfo = getInfoForVSETVLI(MI); + continue; + } + + uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVII::hasSEWOp(TSFlags)) { + VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); + if (RISCVII::hasVLOp(TSFlags)) { + MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2); + if (VLOp.isReg()) { + // Erase the AVL operand from the instruction. + VLOp.setReg(RISCV::NoRegister); + VLOp.setIsKill(false); + } + MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, + /*isImp*/ true)); + } + MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, + /*isImp*/ true)); + + bool NeedVSETVLI = true; + if (CurInfo.isValid() && CurInfo.isCompatible(NewInfo)) + NeedVSETVLI = false; + + // We didn't find a compatible value. If our AVL is a virtual register, + // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need + // and the last VL/VTYPE we observed is the same, we don't need a + // VSETVLI here. + if (NeedVSETVLI && !CurInfo.isUnknown() && NewInfo.hasAVLReg() && + NewInfo.getAVLReg().isVirtual() && NewInfo.hasSameVTYPE(CurInfo)) { + if (MachineInstr *DefMI = MRI->getVRegDef(NewInfo.getAVLReg())) { + if (DefMI->getOpcode() == RISCV::PseudoVSETVLI || + DefMI->getOpcode() == RISCV::PseudoVSETIVLI) { + VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); + if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo)) + NeedVSETVLI = false; + } + } + } + + // If this instruction isn't compatible with the previous VL/VTYPE + // we need to insert a VSETVLI. + if (NeedVSETVLI) { + insertVSETVLI(MBB, MI, NewInfo); + CurInfo = NewInfo; + } + + // If we find an instruction we at least changed the operands. + MadeChange = true; + } + // If this is something updates VL/VTYPE that we don't know about, set + // the state to unknown. + if (MI.isCall() || MI.modifiesRegister(RISCV::VL) || + MI.modifiesRegister(RISCV::VTYPE)) { + VSETVLIInfo NewInfo; + NewInfo.setUnknown(); + CurInfo = NewInfo; + } + } + + return MadeChange; +} + +bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { + // Skip if the vector extension is not enabled. + const RISCVSubtarget &ST = MF.getSubtarget(); + if (!ST.hasStdExtV()) + return false; + + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) + Changed |= emitVSETVLIs(MBB); + + return Changed; +} + +/// Returns an instance of the Insert VSETVLI pass. +FunctionPass *llvm::createRISCVInsertVSETVLIPass() { + return new RISCVInsertVSETVLI(); +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -619,8 +619,6 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -637,9 +635,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = "$rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -654,8 +650,6 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -672,9 +666,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = "$rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -690,8 +682,6 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -710,9 +700,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $merge", "$rd = $merge"); - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -727,8 +715,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -743,8 +729,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -758,8 +742,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -774,8 +756,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -790,8 +770,6 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -804,8 +782,6 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -819,9 +795,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints ="$rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -836,8 +810,6 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; // BaseInstr is not used in RISCVExpandPseudoInsts pass. @@ -853,9 +825,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Constraint; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -870,9 +840,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -887,8 +855,6 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -906,9 +872,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = "@earlyclobber $rd, $rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -925,9 +889,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Constraint; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -943,8 +905,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -960,8 +920,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -979,9 +937,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1001,9 +957,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1025,9 +979,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Constraint; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 0; @@ -1047,9 +999,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1068,9 +1018,7 @@ let mayLoad = 1; let mayStore = 1; let hasSideEffects = 1; - let usesCustomInserter = 1; let Constraints = "$vd_wd = $vd"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1088,9 +1036,7 @@ let mayLoad = 1; let mayStore = 1; let hasSideEffects = 1; - let usesCustomInserter = 1; let Constraints = "$vd_wd = $vd"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -1131,8 +1077,6 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1148,9 +1092,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = "$rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1166,8 +1108,6 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1183,9 +1123,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = "$rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1201,11 +1139,9 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; // For vector indexed segment loads, the destination vector register groups // cannot overlap the source vector register group let Constraints = "@earlyclobber $rd"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1222,11 +1158,9 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; // For vector indexed segment loads, the destination vector register groups // cannot overlap the source vector register group let Constraints = "@earlyclobber $rd, $rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1241,8 +1175,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1258,8 +1190,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -1273,8 +1203,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1290,8 +1218,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -1307,8 +1233,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1325,8 +1249,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -3521,7 +3443,7 @@ //===----------------------------------------------------------------------===// // 13.2. Vector Single-Width Averaging Add and Subtract //===----------------------------------------------------------------------===// -let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in { +let Uses = [VXRM], hasSideEffects = 1 in { defm PseudoVAADDU : VPseudoBinaryV_VV_VX; defm PseudoVAADD : VPseudoBinaryV_VV_VX; defm PseudoVASUBU : VPseudoBinaryV_VV_VX; @@ -3531,14 +3453,14 @@ //===----------------------------------------------------------------------===// // 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation //===----------------------------------------------------------------------===// -let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in { +let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in { defm PseudoVSMUL : VPseudoBinaryV_VV_VX; } //===----------------------------------------------------------------------===// // 13.4. Vector Single-Width Scaling Shift Instructions //===----------------------------------------------------------------------===// -let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in { +let Uses = [VXRM], hasSideEffects = 1 in { defm PseudoVSSRL : VPseudoBinaryV_VV_VX_VI; defm PseudoVSSRA : VPseudoBinaryV_VV_VX_VI; } @@ -3546,7 +3468,7 @@ //===----------------------------------------------------------------------===// // 13.5. Vector Narrowing Fixed-Point Clip Instructions //===----------------------------------------------------------------------===// -let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in { +let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in { defm PseudoVNCLIP : VPseudoBinaryV_WV_WX_WI; defm PseudoVNCLIPU : VPseudoBinaryV_WV_WX_WI; } @@ -3792,8 +3714,7 @@ //===----------------------------------------------------------------------===// let Predicates = [HasStdExtV] in { -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1, - Uses = [VL, VTYPE] in { +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { foreach m = MxList.m in { let VLMul = m.value in { let HasSEWOp = 1, BaseInstr = VMV_X_S in @@ -3816,8 +3737,7 @@ //===----------------------------------------------------------------------===// let Predicates = [HasStdExtV, HasStdExtF] in { -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1, - Uses = [VL, VTYPE] in { +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { foreach m = MxList.m in { foreach f = FPList.fpinfo in { let VLMul = m.value in { diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -39,7 +39,7 @@ initializeGlobalISel(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVExpandPseudoPass(*PR); - initializeRISCVCleanupVSETVLIPass(*PR); + initializeRISCVInsertVSETVLIPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -191,8 +191,7 @@ } void RISCVPassConfig::addPreRegAlloc() { - if (TM->getOptLevel() != CodeGenOpt::None) { + if (TM->getOptLevel() != CodeGenOpt::None) addPass(createRISCVMergeBaseOffsetOptPass()); - addPass(createRISCVCleanupVSETVLIPass()); - } + addPass(createRISCVInsertVSETVLIPass()); } diff --git a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir b/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir +++ /dev/null @@ -1,56 +0,0 @@ -# RUN: llc -mtriple riscv64 -mattr=+experimental-v %s \ -# RUN: -start-before=finalize-isel -stop-after=finalize-isel -o - \ -# RUN: | FileCheck --check-prefix=POST-INSERTER %s - -# RUN: llc -mtriple riscv64 -mattr=+experimental-v %s \ -# RUN: -start-before=finalize-isel -o - \ -# RUN: | FileCheck --check-prefix=CODEGEN %s - ---- | - define void @vadd_vint64m1( - *%pc, - *%pa, - *%pb, - i64 %vl) - { - ret void - } -... ---- -name: vadd_vint64m1 -tracksRegLiveness: true -body: | - bb.0 (%ir-block.0): - liveins: $x10, $x11, $x12, $x13 - - %3:gpr = COPY $x13 - %2:gpr = COPY $x12 - %1:gpr = COPY $x11 - %0:gpr = COPY $x10 - %4:vr = PseudoVLE64_V_M1 %1, %3, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) - %5:vr = PseudoVLE64_V_M1 %2, %3, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pb, align 8) - %6:vr = PseudoVADD_VV_M1 killed %4, killed %5, %3, 6, implicit $vl, implicit $vtype - PseudoVSE64_V_M1 killed %6, %0, %3, 6, implicit $vl, implicit $vtype :: (store unknown-size into %ir.pc, align 8) - PseudoRET - -... - -# POST-INSERTER: %0:gpr = COPY $x13 -# POST-INSERTER: %1:gpr = COPY $x12 -# POST-INSERTER: %2:gpr = COPY $x11 -# POST-INSERTER: %3:gpr = COPY $x10 -# POST-INSERTER: dead %7:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype -# POST-INSERTER: %4:vr = PseudoVLE64_V_M1 %2, $noreg, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) -# POST-INSERTER: dead %8:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype -# POST-INSERTER: %5:vr = PseudoVLE64_V_M1 %1, $noreg, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pb, align 8) -# POST-INSERTER: dead %9:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype -# POST-INSERTER: %6:vr = PseudoVADD_VV_M1 killed %4, killed %5, $noreg, 6, implicit $vl, implicit $vtype -# POST-INSERTER: dead %10:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype -# POST-INSERTER: PseudoVSE64_V_M1 killed %6, %3, $noreg, 6, implicit $vl, implicit $vtype :: (store unknown-size into %ir.pc, align 8) - -# CODEGEN: vsetvli a3, a3, e64,m1,ta,mu -# CODEGEN-NEXT: vle64.v v25, (a1) -# CODEGEN-NEXT: vle64.v v26, (a2) -# CODEGEN-NEXT: vadd.vv v25, v25, v26 -# CODEGEN-NEXT: vse64.v v25, (a0) -# CODEGEN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll b/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll +++ /dev/null @@ -1,32 +0,0 @@ -; This test shows the evolution of RVV pseudo instructions within isel. - -; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o %t.pre.mir \ -; RUN: -stop-before=finalize-isel -; RUN: cat %t.pre.mir | FileCheck --check-prefix=PRE-INSERTER %s - -; RUN: llc -mtriple riscv64 -mattr=+experimental-v %t.pre.mir -o %t.post.mir \ -; RUN: -start-before=finalize-isel -stop-after=finalize-isel -; RUN: cat %t.post.mir | FileCheck --check-prefix=POST-INSERTER %s - -define void @vadd_vint64m1( - *%pc, - *%pa, - *%pb) -{ - %va = load , * %pa - %vb = load , * %pb - %vc = add %va, %vb - store %vc, *%pc - ret void -} - -; PRE-INSERTER: %3:vr = VL1RE64_V %1 :: (load unknown-size from %ir.pa, align 8) -; PRE-INSERTER: %4:vr = VL1RE64_V %2 :: (load unknown-size from %ir.pb, align 8) -; PRE-INSERTER: %5:vr = PseudoVADD_VV_M1 killed %3, killed %4, $x0, 6, implicit $vl, implicit $vtype -; PRE-INSERTER: VS1R_V killed %5, %0 :: (store unknown-size into %ir.pc, align 8) - -; POST-INSERTER: %3:vr = VL1RE64_V %1 :: (load unknown-size from %ir.pa, align 8) -; POST-INSERTER: %4:vr = VL1RE64_V %2 :: (load unknown-size from %ir.pb, align 8) -; POST-INSERTER: dead %6:gpr = PseudoVSETVLI $x0, 88, implicit-def $vl, implicit-def $vtype -; POST-INSERTER: %5:vr = PseudoVADD_VV_M1 killed %3, killed %4, $noreg, 6, implicit $vl, implicit $vtype -; POST-INSERTER: VS1R_V killed %5, %0 :: (store unknown-size into %ir.pc, align 8) diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=riscv64 -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s +# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s --- | define void @add_scalable_offset( @@ -55,7 +55,7 @@ ; CHECK: PseudoRET %1:gpr = COPY $x11 %0:gpr = COPY $x10 - %2:vr = PseudoVLE64_V_M1 %0, %1, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) + %2:vr = PseudoVLE64_V_M1 %0, %1, 6 :: (load unknown-size from %ir.pa, align 8) %3:gpr = ADDI %stack.2, 0 VS1R_V killed %2:vr, %3:gpr PseudoRET diff --git a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir +++ /dev/null @@ -1,46 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s - -# Make sure we don't combine these VSET{I}VLIs in the cleanup pass. We could not -# differentiate AVL values if the opcode of the previous one is different from -# current one. - ---- | - ; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetivli.ll' - source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetivli.ll" - target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" - target triple = "riscv64" - - define void @cleanup_vsetivli() #0 { - ret void - } - - attributes #0 = { "target-features"="+experimental-v" } - -... ---- -name: cleanup_vsetivli -alignment: 4 -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } -frameInfo: - maxAlignment: 1 -machineFunctionInfo: {} -body: | - bb.0 (%ir-block.0): - ; CHECK-LABEL: name: cleanup_vsetivli - ; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: dead %1:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: dead %3:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: dead %5:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: PseudoRET - dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - dead %1:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype - dead %2:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype - dead %3:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - dead %4:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - dead %5:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype - PseudoRET - -... diff --git a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir +++ /dev/null @@ -1,79 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s - ---- | - ; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll' - source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll" - target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" - target triple = "riscv64" - - define void @cleanup_vsetvli0() #0 { - ret void - } - - define void @cleanup_vsetvli1() #0 { - ret void - } - - attributes #0 = { "target-features"="+experimental-v" } - -... ---- -# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first -# keeps the previous value of VL, the second sets it to VLMAX. We can't remove -# the first since we can't tell if this is a change of VL. -name: cleanup_vsetvli0 -alignment: 4 -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } -frameInfo: - maxAlignment: 1 -machineFunctionInfo: {} -body: | - bb.0 (%ir-block.0): - ; CHECK-LABEL: name: cleanup_vsetvli0 - ; CHECK: dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: PseudoRET - dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - PseudoRET - -... ---- -# 1. Ensure we can remove the second VSETVLI which takes its AVL from the first VSETVLI. -# 2. Ensure we can remove the fourth VSETVLI which takes its AVL from the VSETIVLI. -# 3. Make sure we don't combine the latter two VSETVLIs; the first outputs to a -# physical register which is clobbered by a later instruction. -name: cleanup_vsetvli1 -alignment: 4 -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } -frameInfo: - maxAlignment: 1 -machineFunctionInfo: {} -body: | - bb.0 (%ir-block.0): - liveins: $x3 - ; CHECK-LABEL: name: cleanup_vsetvli1 - ; CHECK: liveins: $x3 - ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: [[PseudoVSETIVLI:%[0-9]+]]:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: $x1 = COPY $x3 - ; CHECK: dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: PseudoRET - %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - dead %1:gpr = PseudoVSETVLI %0, 12, implicit-def $vl, implicit-def $vtype - - %2:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype - dead %3:gpr = PseudoVSETVLI %2, 12, implicit-def $vl, implicit-def $vtype - - $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - $x1 = COPY $x3 - dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype - PseudoRET - -... diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -3667,11 +3667,12 @@ ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a2, a1, 257 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX2-RV32-NEXT: lui a2, 4112 +; LMULMAX2-RV32-NEXT: addi a2, a2, 257 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX2-RV32-NEXT: # %bb.1: +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 @@ -3726,12 +3727,13 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_5 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -3756,8 +3758,8 @@ ; LMULMAX2-RV32-NEXT: addi a1, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB3_6 ; LMULMAX2-RV32-NEXT: .LBB3_5: -; LMULMAX2-RV32-NEXT: srli a5, a1, 1 -; LMULMAX2-RV32-NEXT: or a1, a1, a5 +; LMULMAX2-RV32-NEXT: srli a1, a5, 1 +; LMULMAX2-RV32-NEXT: or a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -3900,11 +3902,12 @@ ; LMULMAX1-RV32-NEXT: addi a3, a1, 819 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a7, a1, -241 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a2, a1, 257 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: lui a2, 4112 +; LMULMAX1-RV32-NEXT: addi a2, a2, 257 ; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX1-RV32-NEXT: # %bb.1: +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 2 @@ -3959,12 +3962,13 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_5 +; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26 +; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: srli a1, a5, 1 -; LMULMAX1-RV32-NEXT: or a1, a5, a1 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: srli a5, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 4 @@ -3989,8 +3993,8 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB3_6 ; LMULMAX1-RV32-NEXT: .LBB3_5: -; LMULMAX1-RV32-NEXT: srli a5, a1, 1 -; LMULMAX1-RV32-NEXT: or a1, a1, a5 +; LMULMAX1-RV32-NEXT: srli a1, a5, 1 +; LMULMAX1-RV32-NEXT: or a1, a5, a1 ; LMULMAX1-RV32-NEXT: srli a5, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 4 @@ -11120,11 +11124,12 @@ ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a2, a1, 257 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: lui a2, 4112 +; LMULMAX2-RV32-NEXT: addi a2, a2, 257 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2 ; LMULMAX2-RV32-NEXT: # %bb.1: +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 @@ -11179,12 +11184,13 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 3 ; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_5 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11209,8 +11215,8 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_6 ; LMULMAX2-RV32-NEXT: .LBB7_5: -; LMULMAX2-RV32-NEXT: srli a5, a1, 1 -; LMULMAX2-RV32-NEXT: or a1, a1, a5 +; LMULMAX2-RV32-NEXT: srli a1, a5, 1 +; LMULMAX2-RV32-NEXT: or a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11237,12 +11243,13 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2 ; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8 ; LMULMAX2-RV32-NEXT: # %bb.7: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11267,8 +11274,8 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_9 ; LMULMAX2-RV32-NEXT: .LBB7_8: -; LMULMAX2-RV32-NEXT: srli a5, a1, 1 -; LMULMAX2-RV32-NEXT: or a1, a1, a5 +; LMULMAX2-RV32-NEXT: srli a1, a5, 1 +; LMULMAX2-RV32-NEXT: or a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11295,12 +11302,13 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11 ; LMULMAX2-RV32-NEXT: # %bb.10: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11325,8 +11333,8 @@ ; LMULMAX2-RV32-NEXT: addi a1, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_12 ; LMULMAX2-RV32-NEXT: .LBB7_11: -; LMULMAX2-RV32-NEXT: srli a5, a1, 1 -; LMULMAX2-RV32-NEXT: or a1, a1, a5 +; LMULMAX2-RV32-NEXT: srli a1, a5, 1 +; LMULMAX2-RV32-NEXT: or a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11544,13 +11552,14 @@ ; LMULMAX1-RV32-NEXT: addi a4, a2, 819 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi t0, a2, -241 -; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: addi a3, a2, 257 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 +; LMULMAX1-RV32-NEXT: lui a3, 4112 +; LMULMAX1-RV32-NEXT: addi a3, a3, 257 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: srli a1, a2, 1 -; LMULMAX1-RV32-NEXT: or a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: srli a2, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11604,11 +11613,12 @@ ; LMULMAX1-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: srli a1, a2, 1 -; LMULMAX1-RV32-NEXT: or a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: srli a2, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11663,11 +11673,12 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX1-RV32-NEXT: # %bb.7: -; LMULMAX1-RV32-NEXT: srli a1, a2, 1 -; LMULMAX1-RV32-NEXT: or a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: srli a2, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11721,11 +11732,12 @@ ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11 ; LMULMAX1-RV32-NEXT: # %bb.10: -; LMULMAX1-RV32-NEXT: srli a1, a2, 1 -; LMULMAX1-RV32-NEXT: or a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: srli a2, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -2538,9 +2538,6 @@ ; LMULMAX2-RV32-NEXT: sw zero, 12(sp) ; LMULMAX2-RV32-NEXT: sw zero, 4(sp) ; LMULMAX2-RV32-NEXT: addi a6, zero, 32 -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX2-RV32-NEXT: lui a1, 209715 @@ -2548,13 +2545,16 @@ ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 ; LMULMAX2-RV32-NEXT: lui a2, 4112 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25 ; LMULMAX2-RV32-NEXT: addi a2, a2, 257 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_2 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: addi a1, a5, -1 -; LMULMAX2-RV32-NEXT: not a5, a5 -; LMULMAX2-RV32-NEXT: and a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: addi a5, a1, -1 +; LMULMAX2-RV32-NEXT: not a1, a1 +; LMULMAX2-RV32-NEXT: and a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -2570,9 +2570,9 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB3_3 ; LMULMAX2-RV32-NEXT: .LBB3_2: -; LMULMAX2-RV32-NEXT: addi a5, a1, -1 -; LMULMAX2-RV32-NEXT: not a1, a1 -; LMULMAX2-RV32-NEXT: and a1, a1, a5 +; LMULMAX2-RV32-NEXT: addi a1, a5, -1 +; LMULMAX2-RV32-NEXT: not a5, a5 +; LMULMAX2-RV32-NEXT: and a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -2590,10 +2590,11 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25 -; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX2-RV32-NEXT: # %bb.4: +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 ; LMULMAX2-RV32-NEXT: not a1, a1 ; LMULMAX2-RV32-NEXT: and a1, a1, a5 @@ -2719,9 +2720,6 @@ ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) ; LMULMAX1-RV32-NEXT: sw zero, 4(sp) ; LMULMAX1-RV32-NEXT: addi a6, zero, 32 -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26 ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX1-RV32-NEXT: lui a1, 209715 @@ -2729,13 +2727,16 @@ ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a7, a1, -241 ; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25 ; LMULMAX1-RV32-NEXT: addi a2, a2, 257 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_2 +; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: addi a1, a5, -1 -; LMULMAX1-RV32-NEXT: not a5, a5 -; LMULMAX1-RV32-NEXT: and a1, a5, a1 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: addi a5, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: and a5, a5, a4 ; LMULMAX1-RV32-NEXT: sub a1, a1, a5 @@ -2751,9 +2752,9 @@ ; LMULMAX1-RV32-NEXT: addi a5, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB3_3 ; LMULMAX1-RV32-NEXT: .LBB3_2: -; LMULMAX1-RV32-NEXT: addi a5, a1, -1 -; LMULMAX1-RV32-NEXT: not a1, a1 -; LMULMAX1-RV32-NEXT: and a1, a1, a5 +; LMULMAX1-RV32-NEXT: addi a1, a5, -1 +; LMULMAX1-RV32-NEXT: not a5, a5 +; LMULMAX1-RV32-NEXT: and a1, a5, a1 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: and a5, a5, a4 ; LMULMAX1-RV32-NEXT: sub a1, a1, a5 @@ -2771,10 +2772,11 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25 -; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX1-RV32-NEXT: # %bb.4: +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: addi a5, a1, -1 ; LMULMAX1-RV32-NEXT: not a1, a1 ; LMULMAX1-RV32-NEXT: and a1, a1, a5 @@ -7647,9 +7649,6 @@ ; LMULMAX2-RV32-NEXT: sw zero, 12(sp) ; LMULMAX2-RV32-NEXT: sw zero, 4(sp) ; LMULMAX2-RV32-NEXT: addi a6, zero, 32 -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX2-RV32-NEXT: lui a1, 209715 @@ -7657,13 +7656,16 @@ ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 ; LMULMAX2-RV32-NEXT: lui a2, 4112 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 ; LMULMAX2-RV32-NEXT: addi a2, a2, 257 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_2 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: addi a1, a5, -1 -; LMULMAX2-RV32-NEXT: not a5, a5 -; LMULMAX2-RV32-NEXT: and a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 +; LMULMAX2-RV32-NEXT: addi a5, a1, -1 +; LMULMAX2-RV32-NEXT: not a1, a1 +; LMULMAX2-RV32-NEXT: and a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -7679,9 +7681,9 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_3 ; LMULMAX2-RV32-NEXT: .LBB7_2: -; LMULMAX2-RV32-NEXT: addi a5, a1, -1 -; LMULMAX2-RV32-NEXT: not a1, a1 -; LMULMAX2-RV32-NEXT: and a1, a1, a5 +; LMULMAX2-RV32-NEXT: addi a1, a5, -1 +; LMULMAX2-RV32-NEXT: not a5, a5 +; LMULMAX2-RV32-NEXT: and a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -7699,10 +7701,11 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 3 ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5 ; LMULMAX2-RV32-NEXT: # %bb.4: +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 ; LMULMAX2-RV32-NEXT: not a1, a1 ; LMULMAX2-RV32-NEXT: and a1, a1, a5 @@ -7741,10 +7744,11 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2 ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8 ; LMULMAX2-RV32-NEXT: # %bb.7: +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 ; LMULMAX2-RV32-NEXT: not a1, a1 ; LMULMAX2-RV32-NEXT: and a1, a1, a5 @@ -7783,10 +7787,11 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11 ; LMULMAX2-RV32-NEXT: # %bb.10: +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 ; LMULMAX2-RV32-NEXT: not a1, a1 ; LMULMAX2-RV32-NEXT: and a1, a1, a5 @@ -7962,25 +7967,25 @@ ; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: addi a6, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v26, (a6) +; LMULMAX1-RV32-NEXT: addi a7, a0, 16 +; LMULMAX1-RV32-NEXT: vle64.v v26, (a7) ; LMULMAX1-RV32-NEXT: sw zero, 28(sp) ; LMULMAX1-RV32-NEXT: sw zero, 20(sp) -; LMULMAX1-RV32-NEXT: addi a7, zero, 32 -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27 -; LMULMAX1-RV32-NEXT: lui a2, 349525 -; LMULMAX1-RV32-NEXT: addi a5, a2, 1365 -; LMULMAX1-RV32-NEXT: lui a2, 209715 -; LMULMAX1-RV32-NEXT: addi a4, a2, 819 -; LMULMAX1-RV32-NEXT: lui a2, 61681 -; LMULMAX1-RV32-NEXT: addi t0, a2, -241 +; LMULMAX1-RV32-NEXT: addi a6, zero, 32 +; LMULMAX1-RV32-NEXT: lui a1, 349525 +; LMULMAX1-RV32-NEXT: addi a5, a1, 1365 +; LMULMAX1-RV32-NEXT: lui a1, 209715 +; LMULMAX1-RV32-NEXT: addi a4, a1, 819 +; LMULMAX1-RV32-NEXT: lui a1, 61681 +; LMULMAX1-RV32-NEXT: addi t0, a1, -241 ; LMULMAX1-RV32-NEXT: lui a3, 4112 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: addi a3, a3, 257 -; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_2 +; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2 ; LMULMAX1-RV32-NEXT: # %bb.1: +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27 ; LMULMAX1-RV32-NEXT: addi a2, a1, -1 ; LMULMAX1-RV32-NEXT: not a1, a1 ; LMULMAX1-RV32-NEXT: and a1, a1, a2 @@ -7999,9 +8004,9 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_3 ; LMULMAX1-RV32-NEXT: .LBB7_2: -; LMULMAX1-RV32-NEXT: addi a1, a2, -1 -; LMULMAX1-RV32-NEXT: not a2, a2 -; LMULMAX1-RV32-NEXT: and a1, a2, a1 +; LMULMAX1-RV32-NEXT: addi a2, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 @@ -8019,13 +8024,14 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: addi a1, a2, -1 -; LMULMAX1-RV32-NEXT: not a2, a2 -; LMULMAX1-RV32-NEXT: and a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: addi a2, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 @@ -8060,15 +8066,16 @@ ; LMULMAX1-RV32-NEXT: sw a1, 24(sp) ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) ; LMULMAX1-RV32-NEXT: sw zero, 4(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX1-RV32-NEXT: # %bb.7: -; LMULMAX1-RV32-NEXT: addi a1, a2, -1 -; LMULMAX1-RV32-NEXT: not a2, a2 -; LMULMAX1-RV32-NEXT: and a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: addi a2, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 @@ -8104,13 +8111,14 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 -; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11 ; LMULMAX1-RV32-NEXT: # %bb.10: -; LMULMAX1-RV32-NEXT: addi a1, a2, -1 -; LMULMAX1-RV32-NEXT: not a2, a2 -; LMULMAX1-RV32-NEXT: and a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: addi a2, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 @@ -8149,7 +8157,7 @@ ; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v26, (a6) +; LMULMAX1-RV32-NEXT: vse64.v v26, (a7) ; LMULMAX1-RV32-NEXT: addi sp, sp, 32 ; LMULMAX1-RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -26,7 +26,6 @@ ; CHECK-NEXT: vfmv.v.f v26, fa0 ; CHECK-NEXT: vsetivli a0, 1, e16,mf4,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <1 x half>, <1 x half>* %x @@ -45,7 +44,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -63,7 +61,6 @@ ; CHECK-NEXT: vfmv.v.f v26, fa0 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <2 x half>, <2 x half>* %x @@ -82,7 +79,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -100,7 +96,6 @@ ; CHECK-NEXT: vfmv.v.f v26, fa0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x @@ -154,7 +149,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -172,7 +166,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <16 x half>, <16 x half>* %x @@ -192,7 +185,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -211,7 +203,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <32 x half>, <32 x half>* %x @@ -231,7 +222,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -250,7 +240,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <64 x half>, <64 x half>* %x @@ -273,7 +262,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -294,13 +282,11 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v16, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, a2, e16,m8,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -332,7 +318,6 @@ ; CHECK-NEXT: vfmv.v.f v26, fa0 ; CHECK-NEXT: vsetivli a0, 1, e32,mf2,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <1 x float>, <1 x float>* %x @@ -351,7 +336,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -369,7 +353,6 @@ ; CHECK-NEXT: vfmv.v.f v26, fa0 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <2 x float>, <2 x float>* %x @@ -423,7 +406,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -441,7 +423,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <8 x float>, <8 x float>* %x @@ -460,7 +441,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -478,7 +458,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <16 x float>, <16 x float>* %x @@ -498,7 +477,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -517,7 +495,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <32 x float>, <32 x float>* %x @@ -540,7 +517,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -561,13 +537,11 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v16, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, a2, e32,m8,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <64 x float>, <64 x float>* %x @@ -652,7 +626,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.d fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -670,7 +643,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x double>, <4 x double>* %x @@ -689,7 +661,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.d fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -707,7 +678,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <8 x double>, <8 x double>* %x @@ -726,7 +696,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.d fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -744,7 +713,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <16 x double>, <16 x double>* %x @@ -766,7 +734,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.d fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -786,13 +753,11 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v16, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <32 x double>, <32 x double>* %x @@ -813,7 +778,6 @@ ; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <2 x half>, <2 x half>* %x @@ -834,7 +798,6 @@ ; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x @@ -853,7 +816,6 @@ ; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x @@ -872,7 +834,6 @@ ; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x @@ -897,7 +858,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -918,7 +878,6 @@ ; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <2 x float>, <2 x float>* %x @@ -1005,7 +964,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <128 x float>, <128 x float>* %x @@ -1046,7 +1004,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x double>, <4 x double>* %x @@ -1065,7 +1022,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x double>, <4 x double>* %x @@ -1084,7 +1040,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x double>, <4 x double>* %x @@ -1108,7 +1063,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <32 x double>, <32 x double>* %x @@ -1129,7 +1083,6 @@ ; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <2 x half>, <2 x half>* %x @@ -1150,7 +1103,6 @@ ; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x @@ -1169,7 +1121,6 @@ ; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x @@ -1188,7 +1139,6 @@ ; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x half>, <4 x half>* %x @@ -1213,7 +1163,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <128 x half>, <128 x half>* %x @@ -1234,7 +1183,6 @@ ; CHECK-NEXT: vfmv.v.f v26, ft0 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <2 x float>, <2 x float>* %x @@ -1321,7 +1269,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <128 x float>, <128 x float>* %x @@ -1362,7 +1309,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x double>, <4 x double>* %x @@ -1381,7 +1327,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x double>, <4 x double>* %x @@ -1400,7 +1345,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <4 x double>, <4 x double>* %x @@ -1424,7 +1368,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %v = load <32 x double>, <32 x double>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -27,7 +27,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; CHECK-NEXT: vredsum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i8>, <2 x i8>* %x @@ -46,7 +45,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i8>, <4 x i8>* %x @@ -65,7 +63,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i8>, <8 x i8>* %x @@ -103,7 +100,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i8>, <32 x i8>* %x @@ -123,7 +119,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i8>, <64 x i8>* %x @@ -143,7 +138,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -166,7 +160,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -199,7 +192,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i16>, <2 x i16>* %x @@ -218,7 +210,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i16>, <4 x i16>* %x @@ -255,7 +246,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i16>, <16 x i16>* %x @@ -275,7 +265,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i16>, <32 x i16>* %x @@ -295,7 +284,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -318,7 +306,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -351,7 +338,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i32>, <2 x i32>* %x @@ -388,7 +374,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i32>, <8 x i32>* %x @@ -407,7 +392,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i32>, <16 x i32>* %x @@ -427,7 +411,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i32>, <32 x i32>* %x @@ -450,7 +433,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -526,7 +508,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vredsum.vs v25, v26, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -542,7 +523,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vredsum.vs v25, v26, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <4 x i64>, <4 x i64>* %x @@ -561,7 +541,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vredsum.vs v25, v28, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -577,7 +556,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vredsum.vs v25, v28, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <8 x i64>, <8 x i64>* %x @@ -596,7 +574,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredsum.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -612,7 +589,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredsum.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <16 x i64>, <16 x i64>* %x @@ -634,7 +610,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredsum.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -653,7 +628,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredsum.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -681,7 +655,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredsum.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -706,7 +679,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredsum.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <64 x i64>, <64 x i64>* %x @@ -739,7 +711,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; CHECK-NEXT: vredand.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i8>, <2 x i8>* %x @@ -758,7 +729,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; CHECK-NEXT: vredand.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i8>, <4 x i8>* %x @@ -777,7 +747,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu ; CHECK-NEXT: vredand.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i8>, <8 x i8>* %x @@ -815,7 +784,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu ; CHECK-NEXT: vredand.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i8>, <32 x i8>* %x @@ -835,7 +803,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu ; CHECK-NEXT: vredand.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i8>, <64 x i8>* %x @@ -855,7 +822,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -878,7 +844,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -911,7 +876,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vredand.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i16>, <2 x i16>* %x @@ -930,7 +894,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vredand.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i16>, <4 x i16>* %x @@ -967,7 +930,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; CHECK-NEXT: vredand.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i16>, <16 x i16>* %x @@ -987,7 +949,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; CHECK-NEXT: vredand.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i16>, <32 x i16>* %x @@ -1007,7 +968,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -1030,7 +990,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -1063,7 +1022,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vredand.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i32>, <2 x i32>* %x @@ -1100,7 +1058,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; CHECK-NEXT: vredand.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i32>, <8 x i32>* %x @@ -1119,7 +1076,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; CHECK-NEXT: vredand.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i32>, <16 x i32>* %x @@ -1139,7 +1095,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i32>, <32 x i32>* %x @@ -1162,7 +1117,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -1238,7 +1192,6 @@ ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vredand.vs v25, v26, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1254,7 +1207,6 @@ ; RV64-NEXT: vmv.v.i v25, -1 ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vredand.vs v25, v26, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <4 x i64>, <4 x i64>* %x @@ -1273,7 +1225,6 @@ ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vredand.vs v25, v28, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1289,7 +1240,6 @@ ; RV64-NEXT: vmv.v.i v25, -1 ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vredand.vs v25, v28, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <8 x i64>, <8 x i64>* %x @@ -1308,7 +1258,6 @@ ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredand.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1324,7 +1273,6 @@ ; RV64-NEXT: vmv.v.i v25, -1 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredand.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <16 x i64>, <16 x i64>* %x @@ -1346,7 +1294,6 @@ ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredand.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1365,7 +1312,6 @@ ; RV64-NEXT: vmv.v.i v25, -1 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredand.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -1393,7 +1339,6 @@ ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredand.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1418,7 +1363,6 @@ ; RV64-NEXT: vmv.v.i v25, -1 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredand.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <64 x i64>, <64 x i64>* %x @@ -1451,7 +1395,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; CHECK-NEXT: vredor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i8>, <2 x i8>* %x @@ -1470,7 +1413,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; CHECK-NEXT: vredor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i8>, <4 x i8>* %x @@ -1489,7 +1431,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu ; CHECK-NEXT: vredor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i8>, <8 x i8>* %x @@ -1527,7 +1468,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu ; CHECK-NEXT: vredor.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i8>, <32 x i8>* %x @@ -1547,7 +1487,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu ; CHECK-NEXT: vredor.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i8>, <64 x i8>* %x @@ -1567,7 +1506,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -1590,7 +1528,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -1623,7 +1560,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vredor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i16>, <2 x i16>* %x @@ -1642,7 +1578,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vredor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i16>, <4 x i16>* %x @@ -1679,7 +1614,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; CHECK-NEXT: vredor.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i16>, <16 x i16>* %x @@ -1699,7 +1633,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; CHECK-NEXT: vredor.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i16>, <32 x i16>* %x @@ -1719,7 +1652,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -1742,7 +1674,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -1775,7 +1706,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vredor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i32>, <2 x i32>* %x @@ -1812,7 +1742,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; CHECK-NEXT: vredor.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i32>, <8 x i32>* %x @@ -1831,7 +1760,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; CHECK-NEXT: vredor.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i32>, <16 x i32>* %x @@ -1851,7 +1779,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i32>, <32 x i32>* %x @@ -1874,7 +1801,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -1950,7 +1876,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vredor.vs v25, v26, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1966,7 +1891,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vredor.vs v25, v26, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <4 x i64>, <4 x i64>* %x @@ -1985,7 +1909,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vredor.vs v25, v28, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -2001,7 +1924,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vredor.vs v25, v28, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <8 x i64>, <8 x i64>* %x @@ -2020,7 +1942,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredor.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -2036,7 +1957,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredor.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <16 x i64>, <16 x i64>* %x @@ -2058,7 +1978,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredor.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -2077,7 +1996,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredor.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -2105,7 +2023,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredor.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -2130,7 +2047,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredor.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <64 x i64>, <64 x i64>* %x @@ -2163,7 +2079,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; CHECK-NEXT: vredxor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i8>, <2 x i8>* %x @@ -2182,7 +2097,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i8>, <4 x i8>* %x @@ -2201,7 +2115,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i8>, <8 x i8>* %x @@ -2239,7 +2152,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i8>, <32 x i8>* %x @@ -2259,7 +2171,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i8>, <64 x i8>* %x @@ -2279,7 +2190,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -2302,7 +2212,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -2335,7 +2244,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i16>, <2 x i16>* %x @@ -2354,7 +2262,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i16>, <4 x i16>* %x @@ -2391,7 +2298,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i16>, <16 x i16>* %x @@ -2411,7 +2317,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i16>, <32 x i16>* %x @@ -2431,7 +2336,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -2454,7 +2358,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -2487,7 +2390,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i32>, <2 x i32>* %x @@ -2524,7 +2426,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i32>, <8 x i32>* %x @@ -2543,7 +2444,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i32>, <16 x i32>* %x @@ -2563,7 +2463,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i32>, <32 x i32>* %x @@ -2586,7 +2485,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -2662,7 +2560,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vredxor.vs v25, v26, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -2678,7 +2575,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vredxor.vs v25, v26, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <4 x i64>, <4 x i64>* %x @@ -2697,7 +2593,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vredxor.vs v25, v28, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -2713,7 +2608,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vredxor.vs v25, v28, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <8 x i64>, <8 x i64>* %x @@ -2732,7 +2626,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredxor.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -2748,7 +2641,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredxor.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <16 x i64>, <16 x i64>* %x @@ -2770,7 +2662,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredxor.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -2789,7 +2680,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredxor.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -2817,7 +2707,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredxor.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -2842,7 +2731,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredxor.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <64 x i64>, <64 x i64>* %x @@ -2876,7 +2764,6 @@ ; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; CHECK-NEXT: vredmin.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i8>, <2 x i8>* %x @@ -2896,7 +2783,6 @@ ; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; CHECK-NEXT: vredmin.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i8>, <4 x i8>* %x @@ -2916,7 +2802,6 @@ ; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i8>, <8 x i8>* %x @@ -2956,7 +2841,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i8>, <32 x i8>* %x @@ -2977,7 +2861,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu ; CHECK-NEXT: vredmin.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i8>, <64 x i8>* %x @@ -2998,7 +2881,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -3022,7 +2904,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -3057,7 +2938,6 @@ ; RV32-NEXT: vmv.v.x v26, a0 ; RV32-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; RV32-NEXT: vredmin.vs v25, v25, v26 -; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3071,7 +2951,6 @@ ; RV64-NEXT: vmv.v.x v26, a0 ; RV64-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; RV64-NEXT: vredmin.vs v25, v25, v26 -; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <2 x i16>, <2 x i16>* %x @@ -3092,7 +2971,6 @@ ; RV32-NEXT: vmv.v.x v26, a0 ; RV32-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; RV32-NEXT: vredmin.vs v25, v25, v26 -; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3106,7 +2984,6 @@ ; RV64-NEXT: vmv.v.x v26, a0 ; RV64-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; RV64-NEXT: vredmin.vs v25, v25, v26 -; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <4 x i16>, <4 x i16>* %x @@ -3160,7 +3037,6 @@ ; RV32-NEXT: vmv.v.x v25, a0 ; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; RV32-NEXT: vredmin.vs v25, v26, v25 -; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3174,7 +3050,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; RV64-NEXT: vredmin.vs v25, v26, v25 -; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <16 x i16>, <16 x i16>* %x @@ -3196,7 +3071,6 @@ ; RV32-NEXT: vmv.v.x v25, a0 ; RV32-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; RV32-NEXT: vredmin.vs v25, v28, v25 -; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3211,7 +3085,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; RV64-NEXT: vredmin.vs v25, v28, v25 -; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <32 x i16>, <32 x i16>* %x @@ -3233,7 +3106,6 @@ ; RV32-NEXT: vmv.v.x v25, a0 ; RV32-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; RV32-NEXT: vredmin.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3248,7 +3120,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; RV64-NEXT: vredmin.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -3273,7 +3144,6 @@ ; RV32-NEXT: vmv.v.x v25, a0 ; RV32-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; RV32-NEXT: vredmin.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3291,7 +3161,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; RV64-NEXT: vredmin.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -3326,7 +3195,6 @@ ; RV32-NEXT: vmv.v.x v26, a0 ; RV32-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; RV32-NEXT: vredmin.vs v25, v25, v26 -; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3340,7 +3208,6 @@ ; RV64-NEXT: vmv.v.x v26, a0 ; RV64-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; RV64-NEXT: vredmin.vs v25, v25, v26 -; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <2 x i32>, <2 x i32>* %x @@ -3394,7 +3261,6 @@ ; RV32-NEXT: vmv.v.x v25, a0 ; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV32-NEXT: vredmin.vs v25, v26, v25 -; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3408,7 +3274,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV64-NEXT: vredmin.vs v25, v26, v25 -; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <8 x i32>, <8 x i32>* %x @@ -3429,7 +3294,6 @@ ; RV32-NEXT: vmv.v.x v25, a0 ; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; RV32-NEXT: vredmin.vs v25, v28, v25 -; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3443,7 +3307,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; RV64-NEXT: vredmin.vs v25, v28, v25 -; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <16 x i32>, <16 x i32>* %x @@ -3465,7 +3328,6 @@ ; RV32-NEXT: vmv.v.x v25, a0 ; RV32-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; RV32-NEXT: vredmin.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3480,7 +3342,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; RV64-NEXT: vredmin.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <32 x i32>, <32 x i32>* %x @@ -3505,7 +3366,6 @@ ; RV32-NEXT: vmv.v.x v25, a0 ; RV32-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; RV32-NEXT: vredmin.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: ret ; @@ -3523,7 +3383,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; RV64-NEXT: vredmin.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -3618,7 +3477,6 @@ ; RV32-NEXT: vlse64.v v25, (a0), zero ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vredmin.vs v25, v26, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -3637,7 +3495,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vredmin.vs v25, v26, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <4 x i64>, <4 x i64>* %x @@ -3664,7 +3521,6 @@ ; RV32-NEXT: vlse64.v v25, (a0), zero ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vredmin.vs v25, v28, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -3683,7 +3539,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vredmin.vs v25, v28, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <8 x i64>, <8 x i64>* %x @@ -3710,7 +3565,6 @@ ; RV32-NEXT: vlse64.v v25, (a0), zero ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredmin.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -3729,7 +3583,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredmin.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <16 x i64>, <16 x i64>* %x @@ -3759,7 +3612,6 @@ ; RV32-NEXT: vlse64.v v25, (a0), zero ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredmin.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -3781,7 +3633,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredmin.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -3816,7 +3667,6 @@ ; RV32-NEXT: vlse64.v v25, (a0), zero ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredmin.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -3844,7 +3694,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredmin.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <64 x i64>, <64 x i64>* %x @@ -3878,7 +3727,6 @@ ; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; CHECK-NEXT: vredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i8>, <2 x i8>* %x @@ -3898,7 +3746,6 @@ ; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i8>, <4 x i8>* %x @@ -3918,7 +3765,6 @@ ; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i8>, <8 x i8>* %x @@ -3958,7 +3804,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i8>, <32 x i8>* %x @@ -3979,7 +3824,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i8>, <64 x i8>* %x @@ -4000,7 +3844,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -4024,7 +3867,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -4058,7 +3900,6 @@ ; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i16>, <2 x i16>* %x @@ -4078,7 +3919,6 @@ ; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i16>, <4 x i16>* %x @@ -4117,7 +3957,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i16>, <16 x i16>* %x @@ -4138,7 +3977,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i16>, <32 x i16>* %x @@ -4159,7 +3997,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -4183,7 +4020,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -4217,7 +4053,6 @@ ; CHECK-NEXT: vmv.v.x v26, a0 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i32>, <2 x i32>* %x @@ -4256,7 +4091,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i32>, <8 x i32>* %x @@ -4276,7 +4110,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i32>, <16 x i32>* %x @@ -4297,7 +4130,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i32>, <32 x i32>* %x @@ -4321,7 +4153,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -4412,7 +4243,6 @@ ; RV32-NEXT: vlse64.v v25, (a0), zero ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vredmax.vs v25, v26, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -4431,7 +4261,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vredmax.vs v25, v26, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <4 x i64>, <4 x i64>* %x @@ -4456,7 +4285,6 @@ ; RV32-NEXT: vlse64.v v25, (a0), zero ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vredmax.vs v25, v28, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -4475,7 +4303,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vredmax.vs v25, v28, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <8 x i64>, <8 x i64>* %x @@ -4500,7 +4327,6 @@ ; RV32-NEXT: vlse64.v v25, (a0), zero ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredmax.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -4519,7 +4345,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredmax.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <16 x i64>, <16 x i64>* %x @@ -4547,7 +4372,6 @@ ; RV32-NEXT: vlse64.v v25, (a0), zero ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredmax.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -4569,7 +4393,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredmax.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -4602,7 +4425,6 @@ ; RV32-NEXT: vlse64.v v25, (a0), zero ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredmax.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -4630,7 +4452,6 @@ ; RV64-NEXT: vmv.v.x v25, a0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredmax.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <64 x i64>, <64 x i64>* %x @@ -4663,7 +4484,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; CHECK-NEXT: vredminu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i8>, <2 x i8>* %x @@ -4682,7 +4502,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i8>, <4 x i8>* %x @@ -4701,7 +4520,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i8>, <8 x i8>* %x @@ -4739,7 +4557,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i8>, <32 x i8>* %x @@ -4759,7 +4576,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i8>, <64 x i8>* %x @@ -4779,7 +4595,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -4802,7 +4617,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -4835,7 +4649,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i16>, <2 x i16>* %x @@ -4854,7 +4667,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i16>, <4 x i16>* %x @@ -4891,7 +4703,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i16>, <16 x i16>* %x @@ -4911,7 +4722,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i16>, <32 x i16>* %x @@ -4931,7 +4741,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -4954,7 +4763,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -4987,7 +4795,6 @@ ; CHECK-NEXT: vmv.v.i v26, -1 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i32>, <2 x i32>* %x @@ -5024,7 +4831,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i32>, <8 x i32>* %x @@ -5043,7 +4849,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i32>, <16 x i32>* %x @@ -5063,7 +4868,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i32>, <32 x i32>* %x @@ -5086,7 +4890,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -5162,7 +4965,6 @@ ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vredminu.vs v25, v26, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -5178,7 +4980,6 @@ ; RV64-NEXT: vmv.v.i v25, -1 ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vredminu.vs v25, v26, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <4 x i64>, <4 x i64>* %x @@ -5197,7 +4998,6 @@ ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vredminu.vs v25, v28, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -5213,7 +5013,6 @@ ; RV64-NEXT: vmv.v.i v25, -1 ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vredminu.vs v25, v28, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <8 x i64>, <8 x i64>* %x @@ -5232,7 +5031,6 @@ ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredminu.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -5248,7 +5046,6 @@ ; RV64-NEXT: vmv.v.i v25, -1 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredminu.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <16 x i64>, <16 x i64>* %x @@ -5270,7 +5067,6 @@ ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredminu.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -5289,7 +5085,6 @@ ; RV64-NEXT: vmv.v.i v25, -1 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredminu.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -5317,7 +5112,6 @@ ; RV32-NEXT: vmv.v.i v25, -1 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredminu.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -5342,7 +5136,6 @@ ; RV64-NEXT: vmv.v.i v25, -1 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredminu.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <64 x i64>, <64 x i64>* %x @@ -5375,7 +5168,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e8,mf8,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i8>, <2 x i8>* %x @@ -5394,7 +5186,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i8>, <4 x i8>* %x @@ -5413,7 +5204,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i8>, <8 x i8>* %x @@ -5451,7 +5241,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i8>, <32 x i8>* %x @@ -5471,7 +5260,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i8>, <64 x i8>* %x @@ -5491,7 +5279,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i8>, <128 x i8>* %x @@ -5514,7 +5301,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e8,m8,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <256 x i8>, <256 x i8>* %x @@ -5547,7 +5333,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i16>, <2 x i16>* %x @@ -5566,7 +5351,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <4 x i16>, <4 x i16>* %x @@ -5603,7 +5387,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i16>, <16 x i16>* %x @@ -5623,7 +5406,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i16>, <32 x i16>* %x @@ -5643,7 +5425,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i16>, <64 x i16>* %x @@ -5666,7 +5447,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <128 x i16>, <128 x i16>* %x @@ -5699,7 +5479,6 @@ ; CHECK-NEXT: vmv.v.i v26, 0 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v25, v26 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <2 x i32>, <2 x i32>* %x @@ -5736,7 +5515,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v26, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <8 x i32>, <8 x i32>* %x @@ -5755,7 +5533,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v28, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <16 x i32>, <16 x i32>* %x @@ -5775,7 +5552,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <32 x i32>, <32 x i32>* %x @@ -5798,7 +5574,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %v = load <64 x i32>, <64 x i32>* %x @@ -5874,7 +5649,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vredmaxu.vs v25, v26, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -5890,7 +5664,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vredmaxu.vs v25, v26, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <4 x i64>, <4 x i64>* %x @@ -5909,7 +5682,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vredmaxu.vs v25, v28, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -5925,7 +5697,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vredmaxu.vs v25, v28, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <8 x i64>, <8 x i64>* %x @@ -5944,7 +5715,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredmaxu.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -5960,7 +5730,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredmaxu.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <16 x i64>, <16 x i64>* %x @@ -5982,7 +5751,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredmaxu.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -6001,7 +5769,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredmaxu.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <32 x i64>, <32 x i64>* %x @@ -6029,7 +5796,6 @@ ; RV32-NEXT: vmv.v.i v25, 0 ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vredmaxu.vs v25, v8, v25 -; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV32-NEXT: vmv.x.s a0, v25 ; RV32-NEXT: addi a1, zero, 32 ; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -6054,7 +5820,6 @@ ; RV64-NEXT: vmv.v.i v25, 0 ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vredmaxu.vs v25, v8, v25 -; RV64-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; RV64-NEXT: vmv.x.s a0, v25 ; RV64-NEXT: ret %v = load <64 x i64>, <64 x i64>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll @@ -7,20 +7,22 @@ define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) { ; CHECK-LABEL: select_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft3, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft2, v25 ; CHECK-NEXT: bnez a0, .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 -; CHECK-NEXT: fmv.h ft2, ft3 +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: vsetivli a0, 1, e16,mf4,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB0_3 ; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetivli a0, 1, e16,mf4,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: vfmv.f.s ft1, v25 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu -; CHECK-NEXT: vfmv.v.f v8, ft2 +; CHECK-NEXT: vfmv.v.f v8, ft1 ; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x half> %a, <2 x half> %b @@ -31,23 +33,27 @@ ; CHECK-LABEL: selectcc_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: bnez a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vsetivli a1, 1, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: j .LBB1_3 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: vsetivli a1, 1, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB1_3: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: bnez a0, .LBB1_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 -; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: vsetivli a1, 2, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.v.f v25, ft0 -; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: bnez a0, .LBB1_5 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: j .LBB1_6 +; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB1_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.h ft0, ft1 -; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: .LBB1_6: ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -62,44 +68,49 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB2_2 +; CHECK-NEXT: bnez a0, .LBB2_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: beqz a0, .LBB2_4 ; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: j .LBB2_5 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB2_2 +; CHECK-NEXT: .LBB2_4: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: .LBB2_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB2_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: fsh ft0, 14(sp) +; CHECK-NEXT: bnez a0, .LBB2_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB2_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB2_6: -; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_7: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB2_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB2_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB2_11 +; CHECK-NEXT: .LBB2_10: +; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB2_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB2_8: -; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: .LBB2_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 10(sp) ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vle16.v v8, (a0) @@ -115,44 +126,49 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: feq.h a0, fa0, fa1 -; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB3_2 +; CHECK-NEXT: bnez a0, .LBB3_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: beqz a0, .LBB3_4 ; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: j .LBB3_5 +; CHECK-NEXT: .LBB3_3: +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB3_2 +; CHECK-NEXT: .LBB3_4: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: .LBB3_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB3_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: fsh ft0, 14(sp) +; CHECK-NEXT: bnez a0, .LBB3_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB3_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB3_6: -; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: j .LBB3_8 +; CHECK-NEXT: .LBB3_7: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB3_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB3_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB3_11 +; CHECK-NEXT: .LBB3_10: +; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB3_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB3_8: -; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: .LBB3_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 10(sp) ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vle16.v v8, (a0) @@ -168,84 +184,93 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: bnez a0, .LBB4_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsh ft0, 0(sp) +; CHECK-NEXT: beqz a0, .LBB4_4 ; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 7 +; CHECK-NEXT: j .LBB4_5 +; CHECK-NEXT: .LBB4_3: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsh ft0, 0(sp) +; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: .LBB4_4: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 7 +; CHECK-NEXT: .LBB4_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 7 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: fsh ft0, 14(sp) +; CHECK-NEXT: bnez a0, .LBB4_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 6 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB4_8 +; CHECK-NEXT: .LBB4_7: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 6 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_6: -; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: .LBB4_8: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB4_10 +; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 5 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB4_11 +; CHECK-NEXT: .LBB4_10: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 5 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_8: -; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: .LBB4_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 10(sp) +; CHECK-NEXT: bnez a0, .LBB4_13 +; CHECK-NEXT: # %bb.12: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 4 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB4_14 +; CHECK-NEXT: .LBB4_13: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 4 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_10: -; CHECK-NEXT: fsh ft1, 8(sp) +; CHECK-NEXT: .LBB4_14: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB4_16 +; CHECK-NEXT: # %bb.15: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB4_17 +; CHECK-NEXT: .LBB4_16: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_12: -; CHECK-NEXT: fsh ft1, 6(sp) +; CHECK-NEXT: .LBB4_17: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 6(sp) +; CHECK-NEXT: bnez a0, .LBB4_19 +; CHECK-NEXT: # %bb.18: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_14: -; CHECK-NEXT: fsh ft1, 4(sp) +; CHECK-NEXT: j .LBB4_20 +; CHECK-NEXT: .LBB4_19: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB4_20: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 4(sp) +; CHECK-NEXT: bnez a0, .LBB4_22 +; CHECK-NEXT: # %bb.21: +; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB4_23 +; CHECK-NEXT: .LBB4_22: +; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_16 -; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_16: -; CHECK-NEXT: fsh ft1, 2(sp) +; CHECK-NEXT: .LBB4_23: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 2(sp) ; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu ; CHECK-NEXT: vle16.v v8, (sp) ; CHECK-NEXT: addi sp, sp, 16 @@ -260,84 +285,93 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: feq.h a0, fa0, fa1 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB5_2 +; CHECK-NEXT: bnez a0, .LBB5_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsh ft0, 0(sp) +; CHECK-NEXT: beqz a0, .LBB5_4 ; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 7 +; CHECK-NEXT: j .LBB5_5 +; CHECK-NEXT: .LBB5_3: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsh ft0, 0(sp) +; CHECK-NEXT: bnez a0, .LBB5_2 +; CHECK-NEXT: .LBB5_4: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 7 +; CHECK-NEXT: .LBB5_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 7 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_4: -; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: fsh ft0, 14(sp) +; CHECK-NEXT: bnez a0, .LBB5_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 6 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB5_8 +; CHECK-NEXT: .LBB5_7: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 6 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_6: -; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: .LBB5_8: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB5_10 +; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 5 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB5_11 +; CHECK-NEXT: .LBB5_10: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 5 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_8: -; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: .LBB5_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 10(sp) +; CHECK-NEXT: bnez a0, .LBB5_13 +; CHECK-NEXT: # %bb.12: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 4 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB5_14 +; CHECK-NEXT: .LBB5_13: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 4 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_10: -; CHECK-NEXT: fsh ft1, 8(sp) +; CHECK-NEXT: .LBB5_14: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB5_16 +; CHECK-NEXT: # %bb.15: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB5_17 +; CHECK-NEXT: .LBB5_16: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_12: -; CHECK-NEXT: fsh ft1, 6(sp) +; CHECK-NEXT: .LBB5_17: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 6(sp) +; CHECK-NEXT: bnez a0, .LBB5_19 +; CHECK-NEXT: # %bb.18: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_14: -; CHECK-NEXT: fsh ft1, 4(sp) +; CHECK-NEXT: j .LBB5_20 +; CHECK-NEXT: .LBB5_19: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB5_20: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 4(sp) +; CHECK-NEXT: bnez a0, .LBB5_22 +; CHECK-NEXT: # %bb.21: +; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB5_23 +; CHECK-NEXT: .LBB5_22: +; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_16 -; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_16: -; CHECK-NEXT: fsh ft1, 2(sp) +; CHECK-NEXT: .LBB5_23: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 2(sp) ; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu ; CHECK-NEXT: vle16.v v8, (sp) ; CHECK-NEXT: addi sp, sp, 16 @@ -359,165 +393,182 @@ ; RV32-NEXT: addi s0, sp, 64 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB6_2 +; RV32-NEXT: bnez a0, .LBB6_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.h ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsh ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB6_4 ; RV32-NEXT: .LBB6_2: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 15 +; RV32-NEXT: j .LBB6_5 +; RV32-NEXT: .LBB6_3: +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsh ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB6_2 +; RV32-NEXT: .LBB6_4: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 15 +; RV32-NEXT: .LBB6_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_4: -; RV32-NEXT: fsh ft1, 30(sp) +; RV32-NEXT: fsh ft0, 30(sp) +; RV32-NEXT: bnez a0, .LBB6_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 14 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_6: -; RV32-NEXT: fsh ft1, 28(sp) +; RV32-NEXT: j .LBB6_8 +; RV32-NEXT: .LBB6_7: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 13 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vslidedown.vi v26, v8, 14 ; RV32-NEXT: .LBB6_8: -; RV32-NEXT: fsh ft1, 26(sp) -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 12 ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: fsh ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB6_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 13 +; RV32-NEXT: j .LBB6_11 ; RV32-NEXT: .LBB6_10: -; RV32-NEXT: fsh ft1, 24(sp) ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 11 +; RV32-NEXT: vslidedown.vi v26, v8, 13 +; RV32-NEXT: .LBB6_11: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_12: -; RV32-NEXT: fsh ft1, 22(sp) +; RV32-NEXT: fsh ft0, 26(sp) +; RV32-NEXT: bnez a0, .LBB6_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 10 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_14: -; RV32-NEXT: fsh ft1, 20(sp) +; RV32-NEXT: vslidedown.vi v26, v10, 12 +; RV32-NEXT: j .LBB6_14 +; RV32-NEXT: .LBB6_13: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 9 +; RV32-NEXT: vslidedown.vi v26, v8, 12 +; RV32-NEXT: .LBB6_14: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: fsh ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB6_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 11 +; RV32-NEXT: j .LBB6_17 ; RV32-NEXT: .LBB6_16: -; RV32-NEXT: fsh ft1, 18(sp) ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 8 +; RV32-NEXT: vslidedown.vi v26, v8, 11 +; RV32-NEXT: .LBB6_17: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_18: -; RV32-NEXT: fsh ft1, 16(sp) +; RV32-NEXT: fsh ft0, 22(sp) +; RV32-NEXT: bnez a0, .LBB6_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 7 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_20: -; RV32-NEXT: fsh ft1, 14(sp) +; RV32-NEXT: vslidedown.vi v26, v10, 10 +; RV32-NEXT: j .LBB6_20 +; RV32-NEXT: .LBB6_19: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 6 +; RV32-NEXT: vslidedown.vi v26, v8, 10 +; RV32-NEXT: .LBB6_20: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: fsh ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB6_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 9 +; RV32-NEXT: j .LBB6_23 ; RV32-NEXT: .LBB6_22: -; RV32-NEXT: fsh ft1, 12(sp) ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 5 +; RV32-NEXT: vslidedown.vi v26, v8, 9 +; RV32-NEXT: .LBB6_23: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_24: -; RV32-NEXT: fsh ft1, 10(sp) +; RV32-NEXT: fsh ft0, 18(sp) +; RV32-NEXT: bnez a0, .LBB6_25 +; RV32-NEXT: # %bb.24: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_26: -; RV32-NEXT: fsh ft1, 8(sp) +; RV32-NEXT: vslidedown.vi v26, v10, 8 +; RV32-NEXT: j .LBB6_26 +; RV32-NEXT: .LBB6_25: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: vslidedown.vi v26, v8, 8 +; RV32-NEXT: .LBB6_26: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: fsh ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB6_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 7 +; RV32-NEXT: j .LBB6_29 ; RV32-NEXT: .LBB6_28: -; RV32-NEXT: fsh ft1, 6(sp) ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 2 +; RV32-NEXT: vslidedown.vi v26, v8, 7 +; RV32-NEXT: .LBB6_29: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_30: -; RV32-NEXT: fsh ft1, 4(sp) +; RV32-NEXT: fsh ft0, 14(sp) +; RV32-NEXT: bnez a0, .LBB6_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vslidedown.vi v26, v10, 6 +; RV32-NEXT: j .LBB6_32 +; RV32-NEXT: .LBB6_31: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 6 ; RV32-NEXT: .LBB6_32: -; RV32-NEXT: fsh ft1, 2(sp) -; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB6_34 +; RV32-NEXT: # %bb.33: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 5 +; RV32-NEXT: j .LBB6_35 +; RV32-NEXT: .LBB6_34: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 5 +; RV32-NEXT: .LBB6_35: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 10(sp) +; RV32-NEXT: bnez a0, .LBB6_37 +; RV32-NEXT: # %bb.36: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 4 +; RV32-NEXT: j .LBB6_38 +; RV32-NEXT: .LBB6_37: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 4 +; RV32-NEXT: .LBB6_38: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB6_40 +; RV32-NEXT: # %bb.39: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: j .LBB6_41 +; RV32-NEXT: .LBB6_40: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: .LBB6_41: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 6(sp) +; RV32-NEXT: bnez a0, .LBB6_43 +; RV32-NEXT: # %bb.42: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 2 +; RV32-NEXT: j .LBB6_44 +; RV32-NEXT: .LBB6_43: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB6_44: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 4(sp) +; RV32-NEXT: bnez a0, .LBB6_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB6_47 +; RV32-NEXT: .LBB6_46: +; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 1 +; RV32-NEXT: .LBB6_47: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 2(sp) +; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; RV32-NEXT: vle16.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 ; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload @@ -536,164 +587,181 @@ ; RV64-NEXT: addi s0, sp, 64 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB6_2 +; RV64-NEXT: bnez a0, .LBB6_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.h ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsh ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB6_4 ; RV64-NEXT: .LBB6_2: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 15 +; RV64-NEXT: j .LBB6_5 +; RV64-NEXT: .LBB6_3: +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsh ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB6_2 +; RV64-NEXT: .LBB6_4: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 15 +; RV64-NEXT: .LBB6_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_4: -; RV64-NEXT: fsh ft1, 30(sp) +; RV64-NEXT: fsh ft0, 30(sp) +; RV64-NEXT: bnez a0, .LBB6_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 14 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_8 +; RV64-NEXT: .LBB6_7: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_6: -; RV64-NEXT: fsh ft1, 28(sp) +; RV64-NEXT: .LBB6_8: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 28(sp) +; RV64-NEXT: bnez a0, .LBB6_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 13 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_11 +; RV64-NEXT: .LBB6_10: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_8: -; RV64-NEXT: fsh ft1, 26(sp) +; RV64-NEXT: .LBB6_11: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 26(sp) +; RV64-NEXT: bnez a0, .LBB6_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 12 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_14 +; RV64-NEXT: .LBB6_13: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_10: -; RV64-NEXT: fsh ft1, 24(sp) +; RV64-NEXT: .LBB6_14: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB6_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 11 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_17 +; RV64-NEXT: .LBB6_16: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_12: -; RV64-NEXT: fsh ft1, 22(sp) +; RV64-NEXT: .LBB6_17: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 22(sp) +; RV64-NEXT: bnez a0, .LBB6_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 10 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_20 +; RV64-NEXT: .LBB6_19: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_14: -; RV64-NEXT: fsh ft1, 20(sp) +; RV64-NEXT: .LBB6_20: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 20(sp) +; RV64-NEXT: bnez a0, .LBB6_22 +; RV64-NEXT: # %bb.21: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 9 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_23 +; RV64-NEXT: .LBB6_22: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_16: -; RV64-NEXT: fsh ft1, 18(sp) +; RV64-NEXT: .LBB6_23: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 18(sp) +; RV64-NEXT: bnez a0, .LBB6_25 +; RV64-NEXT: # %bb.24: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 8 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_26 +; RV64-NEXT: .LBB6_25: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_18: -; RV64-NEXT: fsh ft1, 16(sp) +; RV64-NEXT: .LBB6_26: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB6_28 +; RV64-NEXT: # %bb.27: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 7 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_29 +; RV64-NEXT: .LBB6_28: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_20: -; RV64-NEXT: fsh ft1, 14(sp) +; RV64-NEXT: .LBB6_29: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 14(sp) +; RV64-NEXT: bnez a0, .LBB6_31 +; RV64-NEXT: # %bb.30: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_32 +; RV64-NEXT: .LBB6_31: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_22: -; RV64-NEXT: fsh ft1, 12(sp) +; RV64-NEXT: .LBB6_32: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB6_34 +; RV64-NEXT: # %bb.33: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_35 +; RV64-NEXT: .LBB6_34: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_24: -; RV64-NEXT: fsh ft1, 10(sp) +; RV64-NEXT: .LBB6_35: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 10(sp) +; RV64-NEXT: bnez a0, .LBB6_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_38 +; RV64-NEXT: .LBB6_37: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_26: -; RV64-NEXT: fsh ft1, 8(sp) +; RV64-NEXT: .LBB6_38: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB6_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_41 +; RV64-NEXT: .LBB6_40: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_28: -; RV64-NEXT: fsh ft1, 6(sp) +; RV64-NEXT: .LBB6_41: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 6(sp) +; RV64-NEXT: bnez a0, .LBB6_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_30: -; RV64-NEXT: fsh ft1, 4(sp) +; RV64-NEXT: j .LBB6_44 +; RV64-NEXT: .LBB6_43: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB6_44: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 4(sp) +; RV64-NEXT: bnez a0, .LBB6_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB6_47 +; RV64-NEXT: .LBB6_46: +; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_32: -; RV64-NEXT: fsh ft1, 2(sp) +; RV64-NEXT: .LBB6_47: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 2(sp) ; RV64-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; RV64-NEXT: vle16.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -718,164 +786,181 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 ; RV32-NEXT: feq.h a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB7_2 +; RV32-NEXT: bnez a0, .LBB7_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.h ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsh ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB7_4 ; RV32-NEXT: .LBB7_2: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 15 +; RV32-NEXT: j .LBB7_5 +; RV32-NEXT: .LBB7_3: +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsh ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB7_2 +; RV32-NEXT: .LBB7_4: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 15 +; RV32-NEXT: .LBB7_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_4: -; RV32-NEXT: fsh ft1, 30(sp) +; RV32-NEXT: fsh ft0, 30(sp) +; RV32-NEXT: bnez a0, .LBB7_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 14 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_8 +; RV32-NEXT: .LBB7_7: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_6: -; RV32-NEXT: fsh ft1, 28(sp) +; RV32-NEXT: .LBB7_8: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 28(sp) +; RV32-NEXT: bnez a0, .LBB7_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 13 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_11 +; RV32-NEXT: .LBB7_10: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_8: -; RV32-NEXT: fsh ft1, 26(sp) +; RV32-NEXT: .LBB7_11: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 26(sp) +; RV32-NEXT: bnez a0, .LBB7_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 12 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_14 +; RV32-NEXT: .LBB7_13: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_10: -; RV32-NEXT: fsh ft1, 24(sp) +; RV32-NEXT: .LBB7_14: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB7_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 11 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_17 +; RV32-NEXT: .LBB7_16: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_12: -; RV32-NEXT: fsh ft1, 22(sp) +; RV32-NEXT: .LBB7_17: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 22(sp) +; RV32-NEXT: bnez a0, .LBB7_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 10 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_20 +; RV32-NEXT: .LBB7_19: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_14: -; RV32-NEXT: fsh ft1, 20(sp) +; RV32-NEXT: .LBB7_20: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 20(sp) +; RV32-NEXT: bnez a0, .LBB7_22 +; RV32-NEXT: # %bb.21: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 9 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_23 +; RV32-NEXT: .LBB7_22: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_16: -; RV32-NEXT: fsh ft1, 18(sp) +; RV32-NEXT: .LBB7_23: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 18(sp) +; RV32-NEXT: bnez a0, .LBB7_25 +; RV32-NEXT: # %bb.24: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 8 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_26 +; RV32-NEXT: .LBB7_25: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_18: -; RV32-NEXT: fsh ft1, 16(sp) +; RV32-NEXT: .LBB7_26: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB7_28 +; RV32-NEXT: # %bb.27: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 7 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_29 +; RV32-NEXT: .LBB7_28: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_20: -; RV32-NEXT: fsh ft1, 14(sp) +; RV32-NEXT: .LBB7_29: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 14(sp) +; RV32-NEXT: bnez a0, .LBB7_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_32 +; RV32-NEXT: .LBB7_31: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_22: -; RV32-NEXT: fsh ft1, 12(sp) +; RV32-NEXT: .LBB7_32: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB7_34 +; RV32-NEXT: # %bb.33: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_35 +; RV32-NEXT: .LBB7_34: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_24: -; RV32-NEXT: fsh ft1, 10(sp) +; RV32-NEXT: .LBB7_35: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 10(sp) +; RV32-NEXT: bnez a0, .LBB7_37 +; RV32-NEXT: # %bb.36: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_38 +; RV32-NEXT: .LBB7_37: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_26: -; RV32-NEXT: fsh ft1, 8(sp) +; RV32-NEXT: .LBB7_38: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB7_40 +; RV32-NEXT: # %bb.39: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_41 +; RV32-NEXT: .LBB7_40: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_28: -; RV32-NEXT: fsh ft1, 6(sp) +; RV32-NEXT: .LBB7_41: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 6(sp) +; RV32-NEXT: bnez a0, .LBB7_43 +; RV32-NEXT: # %bb.42: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_30: -; RV32-NEXT: fsh ft1, 4(sp) +; RV32-NEXT: j .LBB7_44 +; RV32-NEXT: .LBB7_43: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB7_44: ; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 4(sp) +; RV32-NEXT: bnez a0, .LBB7_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB7_47 +; RV32-NEXT: .LBB7_46: +; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_32: -; RV32-NEXT: fsh ft1, 2(sp) +; RV32-NEXT: .LBB7_47: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 2(sp) ; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; RV32-NEXT: vle16.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 @@ -896,164 +981,181 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 ; RV64-NEXT: feq.h a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB7_2 +; RV64-NEXT: bnez a0, .LBB7_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.h ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsh ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB7_4 ; RV64-NEXT: .LBB7_2: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 15 +; RV64-NEXT: j .LBB7_5 +; RV64-NEXT: .LBB7_3: +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsh ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB7_2 +; RV64-NEXT: .LBB7_4: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 15 +; RV64-NEXT: .LBB7_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_4: -; RV64-NEXT: fsh ft1, 30(sp) +; RV64-NEXT: fsh ft0, 30(sp) +; RV64-NEXT: bnez a0, .LBB7_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 14 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_6: -; RV64-NEXT: fsh ft1, 28(sp) +; RV64-NEXT: j .LBB7_8 +; RV64-NEXT: .LBB7_7: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 13 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: vslidedown.vi v26, v8, 14 ; RV64-NEXT: .LBB7_8: -; RV64-NEXT: fsh ft1, 26(sp) -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 12 ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: fsh ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB7_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 13 +; RV64-NEXT: j .LBB7_11 ; RV64-NEXT: .LBB7_10: -; RV64-NEXT: fsh ft1, 24(sp) ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 11 +; RV64-NEXT: vslidedown.vi v26, v8, 13 +; RV64-NEXT: .LBB7_11: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_12: -; RV64-NEXT: fsh ft1, 22(sp) +; RV64-NEXT: fsh ft0, 26(sp) +; RV64-NEXT: bnez a0, .LBB7_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 10 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_14: -; RV64-NEXT: fsh ft1, 20(sp) +; RV64-NEXT: vslidedown.vi v26, v10, 12 +; RV64-NEXT: j .LBB7_14 +; RV64-NEXT: .LBB7_13: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 9 +; RV64-NEXT: vslidedown.vi v26, v8, 12 +; RV64-NEXT: .LBB7_14: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: fsh ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB7_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 11 +; RV64-NEXT: j .LBB7_17 ; RV64-NEXT: .LBB7_16: -; RV64-NEXT: fsh ft1, 18(sp) ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 8 +; RV64-NEXT: vslidedown.vi v26, v8, 11 +; RV64-NEXT: .LBB7_17: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_18: -; RV64-NEXT: fsh ft1, 16(sp) +; RV64-NEXT: fsh ft0, 22(sp) +; RV64-NEXT: bnez a0, .LBB7_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 7 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_20: -; RV64-NEXT: fsh ft1, 14(sp) +; RV64-NEXT: vslidedown.vi v26, v10, 10 +; RV64-NEXT: j .LBB7_20 +; RV64-NEXT: .LBB7_19: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 6 +; RV64-NEXT: vslidedown.vi v26, v8, 10 +; RV64-NEXT: .LBB7_20: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: fsh ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB7_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 9 +; RV64-NEXT: j .LBB7_23 ; RV64-NEXT: .LBB7_22: -; RV64-NEXT: fsh ft1, 12(sp) ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 5 +; RV64-NEXT: vslidedown.vi v26, v8, 9 +; RV64-NEXT: .LBB7_23: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 18(sp) +; RV64-NEXT: bnez a0, .LBB7_25 +; RV64-NEXT: # %bb.24: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 8 +; RV64-NEXT: j .LBB7_26 +; RV64-NEXT: .LBB7_25: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 8 +; RV64-NEXT: .LBB7_26: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB7_28 +; RV64-NEXT: # %bb.27: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 7 +; RV64-NEXT: j .LBB7_29 +; RV64-NEXT: .LBB7_28: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 7 +; RV64-NEXT: .LBB7_29: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 14(sp) +; RV64-NEXT: bnez a0, .LBB7_31 +; RV64-NEXT: # %bb.30: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 6 +; RV64-NEXT: j .LBB7_32 +; RV64-NEXT: .LBB7_31: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 6 +; RV64-NEXT: .LBB7_32: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB7_34 +; RV64-NEXT: # %bb.33: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 5 +; RV64-NEXT: j .LBB7_35 +; RV64-NEXT: .LBB7_34: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_24: -; RV64-NEXT: fsh ft1, 10(sp) +; RV64-NEXT: .LBB7_35: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 10(sp) +; RV64-NEXT: bnez a0, .LBB7_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB7_38 +; RV64-NEXT: .LBB7_37: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_26: -; RV64-NEXT: fsh ft1, 8(sp) +; RV64-NEXT: .LBB7_38: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB7_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB7_41 +; RV64-NEXT: .LBB7_40: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_28: -; RV64-NEXT: fsh ft1, 6(sp) +; RV64-NEXT: .LBB7_41: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 6(sp) +; RV64-NEXT: bnez a0, .LBB7_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_30: -; RV64-NEXT: fsh ft1, 4(sp) +; RV64-NEXT: j .LBB7_44 +; RV64-NEXT: .LBB7_43: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB7_44: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 4(sp) +; RV64-NEXT: bnez a0, .LBB7_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB7_47 +; RV64-NEXT: .LBB7_46: +; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_32: -; RV64-NEXT: fsh ft1, 2(sp) +; RV64-NEXT: .LBB7_47: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 2(sp) ; RV64-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; RV64-NEXT: vle16.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -1069,20 +1171,22 @@ define <2 x float> @select_v2f32(i1 zeroext %c, <2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: select_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft3, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft2, v25 ; CHECK-NEXT: bnez a0, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.s ft0, ft1 -; CHECK-NEXT: fmv.s ft2, ft3 +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: vsetivli a0, 1, e32,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB8_3 ; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetivli a0, 1, e32,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB8_3: +; CHECK-NEXT: vfmv.f.s ft1, v25 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; CHECK-NEXT: vfmv.v.f v8, ft2 +; CHECK-NEXT: vfmv.v.f v8, ft1 ; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x float> %a, <2 x float> %b @@ -1093,23 +1197,27 @@ ; CHECK-LABEL: selectcc_v2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: bnez a0, .LBB9_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vsetivli a1, 1, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: j .LBB9_3 +; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: vsetivli a1, 1, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB9_3: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: bnez a0, .LBB9_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.s ft0, ft1 -; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.v.f v25, ft0 -; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: bnez a0, .LBB9_5 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: j .LBB9_6 +; CHECK-NEXT: .LBB9_5: +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB9_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.s ft0, ft1 -; CHECK-NEXT: .LBB9_4: +; CHECK-NEXT: .LBB9_6: ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -1124,44 +1232,49 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB10_2 +; CHECK-NEXT: bnez a0, .LBB10_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.s ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsw ft0, 0(sp) +; CHECK-NEXT: beqz a0, .LBB10_4 ; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: j .LBB10_5 +; CHECK-NEXT: .LBB10_3: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsw ft0, 0(sp) +; CHECK-NEXT: bnez a0, .LBB10_2 +; CHECK-NEXT: .LBB10_4: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: .LBB10_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB10_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB10_4: -; CHECK-NEXT: fsw ft1, 12(sp) +; CHECK-NEXT: fsw ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB10_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB10_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB10_6: -; CHECK-NEXT: fsw ft1, 8(sp) +; CHECK-NEXT: j .LBB10_8 +; CHECK-NEXT: .LBB10_7: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB10_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsw ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB10_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB10_11 +; CHECK-NEXT: .LBB10_10: +; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB10_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB10_8: -; CHECK-NEXT: fsw ft1, 4(sp) +; CHECK-NEXT: .LBB10_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsw ft0, 4(sp) ; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v8, (sp) ; CHECK-NEXT: addi sp, sp, 16 @@ -1176,44 +1289,49 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: feq.s a0, fa0, fa1 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB11_2 +; CHECK-NEXT: bnez a0, .LBB11_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.s ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsw ft0, 0(sp) +; CHECK-NEXT: beqz a0, .LBB11_4 ; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: j .LBB11_5 +; CHECK-NEXT: .LBB11_3: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsw ft0, 0(sp) +; CHECK-NEXT: bnez a0, .LBB11_2 +; CHECK-NEXT: .LBB11_4: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: .LBB11_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB11_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB11_4: -; CHECK-NEXT: fsw ft1, 12(sp) +; CHECK-NEXT: fsw ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB11_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB11_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB11_6: -; CHECK-NEXT: fsw ft1, 8(sp) +; CHECK-NEXT: j .LBB11_8 +; CHECK-NEXT: .LBB11_7: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB11_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsw ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB11_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB11_11 +; CHECK-NEXT: .LBB11_10: +; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB11_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB11_8: -; CHECK-NEXT: fsw ft1, 4(sp) +; CHECK-NEXT: .LBB11_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsw ft0, 4(sp) ; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v8, (sp) ; CHECK-NEXT: addi sp, sp, 16 @@ -1235,84 +1353,93 @@ ; RV32-NEXT: addi s0, sp, 64 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB12_2 +; RV32-NEXT: bnez a0, .LBB12_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB12_4 ; RV32-NEXT: .LBB12_2: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 7 +; RV32-NEXT: j .LBB12_5 +; RV32-NEXT: .LBB12_3: +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB12_2 +; RV32-NEXT: .LBB12_4: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 7 +; RV32-NEXT: .LBB12_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_4: -; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: fsw ft0, 28(sp) +; RV32-NEXT: bnez a0, .LBB12_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB12_8 +; RV32-NEXT: .LBB12_7: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_6: -; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: .LBB12_8: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB12_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB12_11 +; RV32-NEXT: .LBB12_10: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_8: -; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: .LBB12_11: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 20(sp) +; RV32-NEXT: bnez a0, .LBB12_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB12_14 +; RV32-NEXT: .LBB12_13: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_10: -; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: .LBB12_14: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB12_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB12_17 +; RV32-NEXT: .LBB12_16: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_12: -; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: .LBB12_17: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB12_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_14: -; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: j .LBB12_20 +; RV32-NEXT: .LBB12_19: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB12_20: ; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB12_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB12_23 +; RV32-NEXT: .LBB12_22: +; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_16: -; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: .LBB12_23: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 4(sp) ; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV32-NEXT: vle32.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 @@ -1332,84 +1459,93 @@ ; RV64-NEXT: addi s0, sp, 64 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB12_2 +; RV64-NEXT: bnez a0, .LBB12_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB12_4 ; RV64-NEXT: .LBB12_2: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 7 +; RV64-NEXT: j .LBB12_5 +; RV64-NEXT: .LBB12_3: +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB12_2 +; RV64-NEXT: .LBB12_4: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 7 +; RV64-NEXT: .LBB12_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_4: -; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: fsw ft0, 28(sp) +; RV64-NEXT: bnez a0, .LBB12_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB12_8 +; RV64-NEXT: .LBB12_7: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_6: -; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: .LBB12_8: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB12_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB12_11 +; RV64-NEXT: .LBB12_10: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_8: -; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: .LBB12_11: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 20(sp) +; RV64-NEXT: bnez a0, .LBB12_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB12_14 +; RV64-NEXT: .LBB12_13: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_10: -; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: .LBB12_14: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB12_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB12_17 +; RV64-NEXT: .LBB12_16: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_12: -; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: .LBB12_17: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB12_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_14: -; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: j .LBB12_20 +; RV64-NEXT: .LBB12_19: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB12_20: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB12_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB12_23 +; RV64-NEXT: .LBB12_22: +; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_16: -; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: .LBB12_23: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 4(sp) ; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV64-NEXT: vle32.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -1434,84 +1570,93 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 ; RV32-NEXT: feq.s a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB13_2 +; RV32-NEXT: bnez a0, .LBB13_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB13_4 ; RV32-NEXT: .LBB13_2: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 7 +; RV32-NEXT: j .LBB13_5 +; RV32-NEXT: .LBB13_3: +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB13_2 +; RV32-NEXT: .LBB13_4: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 7 +; RV32-NEXT: .LBB13_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_4: -; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: fsw ft0, 28(sp) +; RV32-NEXT: bnez a0, .LBB13_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB13_8 +; RV32-NEXT: .LBB13_7: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_6: -; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: .LBB13_8: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB13_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB13_11 +; RV32-NEXT: .LBB13_10: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_8: -; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: .LBB13_11: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 20(sp) +; RV32-NEXT: bnez a0, .LBB13_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB13_14 +; RV32-NEXT: .LBB13_13: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_10: -; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: .LBB13_14: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB13_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB13_17 +; RV32-NEXT: .LBB13_16: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_12: -; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: .LBB13_17: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB13_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_14: -; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: j .LBB13_20 +; RV32-NEXT: .LBB13_19: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB13_20: ; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB13_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB13_23 +; RV32-NEXT: .LBB13_22: +; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_16: -; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: .LBB13_23: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 4(sp) ; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV32-NEXT: vle32.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 @@ -1532,84 +1677,93 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 ; RV64-NEXT: feq.s a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB13_2 +; RV64-NEXT: bnez a0, .LBB13_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB13_4 ; RV64-NEXT: .LBB13_2: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 7 +; RV64-NEXT: j .LBB13_5 +; RV64-NEXT: .LBB13_3: +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB13_2 +; RV64-NEXT: .LBB13_4: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 7 +; RV64-NEXT: .LBB13_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_4: -; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: fsw ft0, 28(sp) +; RV64-NEXT: bnez a0, .LBB13_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB13_8 +; RV64-NEXT: .LBB13_7: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_6: -; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: .LBB13_8: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB13_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB13_11 +; RV64-NEXT: .LBB13_10: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_8: -; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: .LBB13_11: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 20(sp) +; RV64-NEXT: bnez a0, .LBB13_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB13_14 +; RV64-NEXT: .LBB13_13: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_10: -; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: .LBB13_14: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB13_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB13_17 +; RV64-NEXT: .LBB13_16: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_12: -; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: .LBB13_17: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB13_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_14: -; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: j .LBB13_20 +; RV64-NEXT: .LBB13_19: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB13_20: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB13_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB13_23 +; RV64-NEXT: .LBB13_22: +; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_16: -; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: .LBB13_23: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 4(sp) ; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV64-NEXT: vle32.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -1634,164 +1788,181 @@ ; RV32-NEXT: addi s0, sp, 128 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v12 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB14_2 +; RV32-NEXT: bnez a0, .LBB14_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v12 +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB14_4 ; RV32-NEXT: .LBB14_2: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 15 +; RV32-NEXT: j .LBB14_5 +; RV32-NEXT: .LBB14_3: +; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB14_2 +; RV32-NEXT: .LBB14_4: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 15 +; RV32-NEXT: .LBB14_5: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_4: -; RV32-NEXT: fsw ft1, 60(sp) +; RV32-NEXT: fsw ft0, 60(sp) +; RV32-NEXT: bnez a0, .LBB14_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 14 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_8 +; RV32-NEXT: .LBB14_7: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_6: -; RV32-NEXT: fsw ft1, 56(sp) +; RV32-NEXT: .LBB14_8: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 56(sp) +; RV32-NEXT: bnez a0, .LBB14_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 13 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_11 +; RV32-NEXT: .LBB14_10: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_8: -; RV32-NEXT: fsw ft1, 52(sp) +; RV32-NEXT: .LBB14_11: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 52(sp) +; RV32-NEXT: bnez a0, .LBB14_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 12 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_14 +; RV32-NEXT: .LBB14_13: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_10: -; RV32-NEXT: fsw ft1, 48(sp) +; RV32-NEXT: .LBB14_14: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 48(sp) +; RV32-NEXT: bnez a0, .LBB14_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 11 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_17 +; RV32-NEXT: .LBB14_16: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_12: -; RV32-NEXT: fsw ft1, 44(sp) +; RV32-NEXT: .LBB14_17: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 44(sp) +; RV32-NEXT: bnez a0, .LBB14_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 10 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_20 +; RV32-NEXT: .LBB14_19: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_14: -; RV32-NEXT: fsw ft1, 40(sp) +; RV32-NEXT: .LBB14_20: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB14_22 +; RV32-NEXT: # %bb.21: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 9 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_23 +; RV32-NEXT: .LBB14_22: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_16: -; RV32-NEXT: fsw ft1, 36(sp) +; RV32-NEXT: .LBB14_23: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 36(sp) +; RV32-NEXT: bnez a0, .LBB14_25 +; RV32-NEXT: # %bb.24: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 8 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_26 +; RV32-NEXT: .LBB14_25: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_18: -; RV32-NEXT: fsw ft1, 32(sp) +; RV32-NEXT: .LBB14_26: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB14_28 +; RV32-NEXT: # %bb.27: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_29 +; RV32-NEXT: .LBB14_28: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_20: -; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: .LBB14_29: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 28(sp) +; RV32-NEXT: bnez a0, .LBB14_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_32 +; RV32-NEXT: .LBB14_31: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_22: -; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: .LBB14_32: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB14_34 +; RV32-NEXT: # %bb.33: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_35 +; RV32-NEXT: .LBB14_34: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_24: -; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: .LBB14_35: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 20(sp) +; RV32-NEXT: bnez a0, .LBB14_37 +; RV32-NEXT: # %bb.36: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_38 +; RV32-NEXT: .LBB14_37: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_26: -; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: .LBB14_38: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB14_40 +; RV32-NEXT: # %bb.39: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_41 +; RV32-NEXT: .LBB14_40: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_28: -; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: .LBB14_41: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB14_43 +; RV32-NEXT: # %bb.42: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_30: -; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: j .LBB14_44 +; RV32-NEXT: .LBB14_43: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: .LBB14_44: ; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB14_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: j .LBB14_47 +; RV32-NEXT: .LBB14_46: +; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_32: -; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: .LBB14_47: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 4(sp) ; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; RV32-NEXT: vle32.v v8, (sp) ; RV32-NEXT: addi sp, s0, -128 @@ -1811,164 +1982,181 @@ ; RV64-NEXT: addi s0, sp, 128 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v12 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB14_2 +; RV64-NEXT: bnez a0, .LBB14_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v12 +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB14_4 ; RV64-NEXT: .LBB14_2: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v8, 15 +; RV64-NEXT: j .LBB14_5 +; RV64-NEXT: .LBB14_3: +; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB14_2 +; RV64-NEXT: .LBB14_4: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 15 +; RV64-NEXT: .LBB14_5: ; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_4: -; RV64-NEXT: fsw ft1, 60(sp) +; RV64-NEXT: fsw ft0, 60(sp) +; RV64-NEXT: bnez a0, .LBB14_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 14 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_8 +; RV64-NEXT: .LBB14_7: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_6: -; RV64-NEXT: fsw ft1, 56(sp) +; RV64-NEXT: .LBB14_8: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB14_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 13 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_11 +; RV64-NEXT: .LBB14_10: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_8: -; RV64-NEXT: fsw ft1, 52(sp) +; RV64-NEXT: .LBB14_11: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 52(sp) +; RV64-NEXT: bnez a0, .LBB14_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 12 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_14 +; RV64-NEXT: .LBB14_13: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_10: -; RV64-NEXT: fsw ft1, 48(sp) +; RV64-NEXT: .LBB14_14: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB14_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 11 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_17 +; RV64-NEXT: .LBB14_16: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_12: -; RV64-NEXT: fsw ft1, 44(sp) +; RV64-NEXT: .LBB14_17: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 44(sp) +; RV64-NEXT: bnez a0, .LBB14_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 10 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_20 +; RV64-NEXT: .LBB14_19: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_14: -; RV64-NEXT: fsw ft1, 40(sp) +; RV64-NEXT: .LBB14_20: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB14_22 +; RV64-NEXT: # %bb.21: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 9 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_23 +; RV64-NEXT: .LBB14_22: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_16: -; RV64-NEXT: fsw ft1, 36(sp) +; RV64-NEXT: .LBB14_23: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 36(sp) +; RV64-NEXT: bnez a0, .LBB14_25 +; RV64-NEXT: # %bb.24: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 8 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_26 +; RV64-NEXT: .LBB14_25: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_18: -; RV64-NEXT: fsw ft1, 32(sp) +; RV64-NEXT: .LBB14_26: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB14_28 +; RV64-NEXT: # %bb.27: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_29 +; RV64-NEXT: .LBB14_28: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_20: -; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: .LBB14_29: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 28(sp) +; RV64-NEXT: bnez a0, .LBB14_31 +; RV64-NEXT: # %bb.30: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_32 +; RV64-NEXT: .LBB14_31: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_22: -; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: .LBB14_32: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB14_34 +; RV64-NEXT: # %bb.33: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_35 +; RV64-NEXT: .LBB14_34: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_24: -; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: .LBB14_35: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 20(sp) +; RV64-NEXT: bnez a0, .LBB14_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_38 +; RV64-NEXT: .LBB14_37: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_26: -; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: .LBB14_38: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB14_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_41 +; RV64-NEXT: .LBB14_40: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_28: -; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: .LBB14_41: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB14_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_30: -; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: j .LBB14_44 +; RV64-NEXT: .LBB14_43: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: .LBB14_44: ; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB14_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: j .LBB14_47 +; RV64-NEXT: .LBB14_46: +; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_32: -; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: .LBB14_47: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 4(sp) ; RV64-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; RV64-NEXT: vle32.v v8, (sp) ; RV64-NEXT: addi sp, s0, -128 @@ -1993,164 +2181,181 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 ; RV32-NEXT: feq.s a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v12 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB15_2 +; RV32-NEXT: bnez a0, .LBB15_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v12 +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB15_4 ; RV32-NEXT: .LBB15_2: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 15 +; RV32-NEXT: j .LBB15_5 +; RV32-NEXT: .LBB15_3: +; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB15_2 +; RV32-NEXT: .LBB15_4: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 15 +; RV32-NEXT: .LBB15_5: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_4: -; RV32-NEXT: fsw ft1, 60(sp) +; RV32-NEXT: fsw ft0, 60(sp) +; RV32-NEXT: bnez a0, .LBB15_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 14 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_6: -; RV32-NEXT: fsw ft1, 56(sp) +; RV32-NEXT: j .LBB15_8 +; RV32-NEXT: .LBB15_7: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 13 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: vslidedown.vi v28, v8, 14 ; RV32-NEXT: .LBB15_8: -; RV32-NEXT: fsw ft1, 52(sp) -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 12 ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: fsw ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB15_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 13 +; RV32-NEXT: j .LBB15_11 ; RV32-NEXT: .LBB15_10: -; RV32-NEXT: fsw ft1, 48(sp) ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 11 +; RV32-NEXT: vslidedown.vi v28, v8, 13 +; RV32-NEXT: .LBB15_11: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_12: -; RV32-NEXT: fsw ft1, 44(sp) +; RV32-NEXT: fsw ft0, 52(sp) +; RV32-NEXT: bnez a0, .LBB15_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 10 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_14: -; RV32-NEXT: fsw ft1, 40(sp) +; RV32-NEXT: vslidedown.vi v28, v12, 12 +; RV32-NEXT: j .LBB15_14 +; RV32-NEXT: .LBB15_13: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 9 +; RV32-NEXT: vslidedown.vi v28, v8, 12 +; RV32-NEXT: .LBB15_14: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: fsw ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB15_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 11 +; RV32-NEXT: j .LBB15_17 ; RV32-NEXT: .LBB15_16: -; RV32-NEXT: fsw ft1, 36(sp) ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 8 +; RV32-NEXT: vslidedown.vi v28, v8, 11 +; RV32-NEXT: .LBB15_17: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 44(sp) +; RV32-NEXT: bnez a0, .LBB15_19 +; RV32-NEXT: # %bb.18: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 10 +; RV32-NEXT: j .LBB15_20 +; RV32-NEXT: .LBB15_19: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 10 +; RV32-NEXT: .LBB15_20: ; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB15_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 9 +; RV32-NEXT: j .LBB15_23 +; RV32-NEXT: .LBB15_22: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 9 +; RV32-NEXT: .LBB15_23: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 36(sp) +; RV32-NEXT: bnez a0, .LBB15_25 +; RV32-NEXT: # %bb.24: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 8 +; RV32-NEXT: j .LBB15_26 +; RV32-NEXT: .LBB15_25: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_18: -; RV32-NEXT: fsw ft1, 32(sp) +; RV32-NEXT: .LBB15_26: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB15_28 +; RV32-NEXT: # %bb.27: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB15_29 +; RV32-NEXT: .LBB15_28: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_20: -; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: .LBB15_29: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 28(sp) +; RV32-NEXT: bnez a0, .LBB15_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB15_32 +; RV32-NEXT: .LBB15_31: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_22: -; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: .LBB15_32: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB15_34 +; RV32-NEXT: # %bb.33: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB15_35 +; RV32-NEXT: .LBB15_34: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_24: -; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: .LBB15_35: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 20(sp) +; RV32-NEXT: bnez a0, .LBB15_37 +; RV32-NEXT: # %bb.36: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB15_38 +; RV32-NEXT: .LBB15_37: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_26: -; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: .LBB15_38: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB15_40 +; RV32-NEXT: # %bb.39: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB15_41 +; RV32-NEXT: .LBB15_40: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_28: -; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: .LBB15_41: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB15_43 +; RV32-NEXT: # %bb.42: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_30: -; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: j .LBB15_44 +; RV32-NEXT: .LBB15_43: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: .LBB15_44: ; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB15_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: j .LBB15_47 +; RV32-NEXT: .LBB15_46: +; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_32: -; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: .LBB15_47: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 4(sp) ; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; RV32-NEXT: vle32.v v8, (sp) ; RV32-NEXT: addi sp, s0, -128 @@ -2171,164 +2376,181 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: feq.s a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v12 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB15_2 +; RV64-NEXT: bnez a0, .LBB15_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v12 +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB15_4 ; RV64-NEXT: .LBB15_2: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v8, 15 +; RV64-NEXT: j .LBB15_5 +; RV64-NEXT: .LBB15_3: +; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB15_2 +; RV64-NEXT: .LBB15_4: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 15 +; RV64-NEXT: .LBB15_5: ; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_4: -; RV64-NEXT: fsw ft1, 60(sp) +; RV64-NEXT: fsw ft0, 60(sp) +; RV64-NEXT: bnez a0, .LBB15_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 14 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_8 +; RV64-NEXT: .LBB15_7: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_6: -; RV64-NEXT: fsw ft1, 56(sp) +; RV64-NEXT: .LBB15_8: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB15_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 13 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_11 +; RV64-NEXT: .LBB15_10: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_8: -; RV64-NEXT: fsw ft1, 52(sp) +; RV64-NEXT: .LBB15_11: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 52(sp) +; RV64-NEXT: bnez a0, .LBB15_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 12 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_14 +; RV64-NEXT: .LBB15_13: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_10: -; RV64-NEXT: fsw ft1, 48(sp) +; RV64-NEXT: .LBB15_14: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB15_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 11 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_17 +; RV64-NEXT: .LBB15_16: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_12: -; RV64-NEXT: fsw ft1, 44(sp) +; RV64-NEXT: .LBB15_17: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 44(sp) +; RV64-NEXT: bnez a0, .LBB15_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 10 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_20 +; RV64-NEXT: .LBB15_19: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_14: -; RV64-NEXT: fsw ft1, 40(sp) +; RV64-NEXT: .LBB15_20: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB15_22 +; RV64-NEXT: # %bb.21: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 9 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_23 +; RV64-NEXT: .LBB15_22: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_16: -; RV64-NEXT: fsw ft1, 36(sp) +; RV64-NEXT: .LBB15_23: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 36(sp) +; RV64-NEXT: bnez a0, .LBB15_25 +; RV64-NEXT: # %bb.24: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 8 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_26 +; RV64-NEXT: .LBB15_25: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_18: -; RV64-NEXT: fsw ft1, 32(sp) +; RV64-NEXT: .LBB15_26: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB15_28 +; RV64-NEXT: # %bb.27: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_29 +; RV64-NEXT: .LBB15_28: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_20: -; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: .LBB15_29: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 28(sp) +; RV64-NEXT: bnez a0, .LBB15_31 +; RV64-NEXT: # %bb.30: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_32 +; RV64-NEXT: .LBB15_31: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_22: -; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: .LBB15_32: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB15_34 +; RV64-NEXT: # %bb.33: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_35 +; RV64-NEXT: .LBB15_34: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_24: -; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: .LBB15_35: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 20(sp) +; RV64-NEXT: bnez a0, .LBB15_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_38 +; RV64-NEXT: .LBB15_37: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_26: -; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: .LBB15_38: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB15_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_41 +; RV64-NEXT: .LBB15_40: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_28: -; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: .LBB15_41: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB15_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_30: -; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: j .LBB15_44 +; RV64-NEXT: .LBB15_43: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: .LBB15_44: ; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB15_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: j .LBB15_47 +; RV64-NEXT: .LBB15_46: +; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_32: -; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: .LBB15_47: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 4(sp) ; RV64-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; RV64-NEXT: vle32.v v8, (sp) ; RV64-NEXT: addi sp, s0, -128 @@ -2344,20 +2566,22 @@ define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: select_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft3, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft2, v25 ; CHECK-NEXT: bnez a0, .LBB16_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.d ft0, ft1 -; CHECK-NEXT: fmv.d ft2, ft3 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB16_3 ; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB16_3: +; CHECK-NEXT: vfmv.f.s ft1, v25 ; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; CHECK-NEXT: vfmv.v.f v8, ft2 +; CHECK-NEXT: vfmv.v.f v8, ft1 ; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x double> %a, <2 x double> %b @@ -2368,23 +2592,27 @@ ; CHECK-LABEL: selectcc_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: bnez a0, .LBB17_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: j .LBB17_3 +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB17_3: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: bnez a0, .LBB17_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.d ft0, ft1 -; CHECK-NEXT: .LBB17_2: ; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; CHECK-NEXT: vfmv.v.f v25, ft0 -; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: bnez a0, .LBB17_5 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: j .LBB17_6 +; CHECK-NEXT: .LBB17_5: +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB17_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.d ft0, ft1 -; CHECK-NEXT: .LBB17_4: +; CHECK-NEXT: .LBB17_6: ; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -2406,44 +2634,49 @@ ; RV32-NEXT: addi s0, sp, 64 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB18_2 +; RV32-NEXT: bnez a0, .LBB18_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB18_4 ; RV32-NEXT: .LBB18_2: +; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: j .LBB18_5 +; RV32-NEXT: .LBB18_3: +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB18_2 +; RV32-NEXT: .LBB18_4: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: .LBB18_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB18_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB18_4: -; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB18_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB18_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB18_6: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: j .LBB18_8 +; RV32-NEXT: .LBB18_7: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB18_8: ; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB18_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB18_11 +; RV32-NEXT: .LBB18_10: +; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB18_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB18_8: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: .LBB18_11: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 @@ -2463,44 +2696,49 @@ ; RV64-NEXT: addi s0, sp, 64 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB18_2 +; RV64-NEXT: bnez a0, .LBB18_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB18_4 ; RV64-NEXT: .LBB18_2: +; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 3 +; RV64-NEXT: j .LBB18_5 +; RV64-NEXT: .LBB18_3: +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB18_2 +; RV64-NEXT: .LBB18_4: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 +; RV64-NEXT: .LBB18_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB18_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB18_4: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB18_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB18_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB18_6: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB18_8 +; RV64-NEXT: .LBB18_7: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB18_8: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB18_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB18_11 +; RV64-NEXT: .LBB18_10: +; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB18_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB18_8: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB18_11: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -2525,44 +2763,49 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 ; RV32-NEXT: feq.d a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB19_2 +; RV32-NEXT: bnez a0, .LBB19_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB19_4 ; RV32-NEXT: .LBB19_2: +; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: j .LBB19_5 +; RV32-NEXT: .LBB19_3: +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB19_2 +; RV32-NEXT: .LBB19_4: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: .LBB19_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB19_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB19_4: -; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB19_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB19_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB19_6: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: j .LBB19_8 +; RV32-NEXT: .LBB19_7: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB19_8: ; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB19_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB19_11 +; RV32-NEXT: .LBB19_10: +; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB19_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB19_8: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: .LBB19_11: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 @@ -2583,44 +2826,49 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 ; RV64-NEXT: feq.d a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB19_2 +; RV64-NEXT: bnez a0, .LBB19_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB19_4 ; RV64-NEXT: .LBB19_2: +; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 3 +; RV64-NEXT: j .LBB19_5 +; RV64-NEXT: .LBB19_3: +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB19_2 +; RV64-NEXT: .LBB19_4: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 +; RV64-NEXT: .LBB19_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB19_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB19_4: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB19_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB19_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB19_6: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB19_8 +; RV64-NEXT: .LBB19_7: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB19_8: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB19_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB19_11 +; RV64-NEXT: .LBB19_10: +; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB19_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB19_8: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB19_11: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -2645,84 +2893,93 @@ ; RV32-NEXT: addi s0, sp, 128 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v12 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB20_2 +; RV32-NEXT: bnez a0, .LBB20_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v12 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB20_4 ; RV32-NEXT: .LBB20_2: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 7 +; RV32-NEXT: j .LBB20_5 +; RV32-NEXT: .LBB20_3: +; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB20_2 +; RV32-NEXT: .LBB20_4: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 +; RV32-NEXT: .LBB20_5: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_4: -; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: fsd ft0, 56(sp) +; RV32-NEXT: bnez a0, .LBB20_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB20_8 +; RV32-NEXT: .LBB20_7: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_6: -; RV32-NEXT: fsd ft1, 48(sp) +; RV32-NEXT: .LBB20_8: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 48(sp) +; RV32-NEXT: bnez a0, .LBB20_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB20_11 +; RV32-NEXT: .LBB20_10: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_8: -; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: .LBB20_11: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB20_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB20_14 +; RV32-NEXT: .LBB20_13: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_10: -; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: .LBB20_14: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB20_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB20_17 +; RV32-NEXT: .LBB20_16: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_12: -; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: .LBB20_17: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB20_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_14: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: j .LBB20_20 +; RV32-NEXT: .LBB20_19: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: .LBB20_20: ; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB20_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: j .LBB20_23 +; RV32-NEXT: .LBB20_22: +; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_16: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: .LBB20_23: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -128 @@ -2742,84 +2999,93 @@ ; RV64-NEXT: addi s0, sp, 128 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v12 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB20_2 +; RV64-NEXT: bnez a0, .LBB20_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v12 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB20_4 ; RV64-NEXT: .LBB20_2: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v8, 7 +; RV64-NEXT: j .LBB20_5 +; RV64-NEXT: .LBB20_3: +; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB20_2 +; RV64-NEXT: .LBB20_4: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 +; RV64-NEXT: .LBB20_5: ; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_4: -; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: fsd ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB20_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB20_8 +; RV64-NEXT: .LBB20_7: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_6: -; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: .LBB20_8: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB20_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB20_11 +; RV64-NEXT: .LBB20_10: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_8: -; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: .LBB20_11: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB20_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB20_14 +; RV64-NEXT: .LBB20_13: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_10: -; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: .LBB20_14: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB20_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB20_17 +; RV64-NEXT: .LBB20_16: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_12: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: .LBB20_17: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB20_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_14: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB20_20 +; RV64-NEXT: .LBB20_19: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu -; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: .LBB20_20: ; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB20_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: j .LBB20_23 +; RV64-NEXT: .LBB20_22: +; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_16: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB20_23: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -128 @@ -2844,84 +3110,93 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 ; RV32-NEXT: feq.d a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v12 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB21_2 +; RV32-NEXT: bnez a0, .LBB21_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v12 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB21_4 ; RV32-NEXT: .LBB21_2: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 7 +; RV32-NEXT: j .LBB21_5 +; RV32-NEXT: .LBB21_3: +; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB21_2 +; RV32-NEXT: .LBB21_4: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 +; RV32-NEXT: .LBB21_5: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_4: -; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: fsd ft0, 56(sp) +; RV32-NEXT: bnez a0, .LBB21_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB21_8 +; RV32-NEXT: .LBB21_7: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_6: -; RV32-NEXT: fsd ft1, 48(sp) +; RV32-NEXT: .LBB21_8: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 48(sp) +; RV32-NEXT: bnez a0, .LBB21_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB21_11 +; RV32-NEXT: .LBB21_10: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_8: -; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: .LBB21_11: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB21_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB21_14 +; RV32-NEXT: .LBB21_13: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_10: -; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: .LBB21_14: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB21_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB21_17 +; RV32-NEXT: .LBB21_16: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_12: -; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: .LBB21_17: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB21_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_14: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: j .LBB21_20 +; RV32-NEXT: .LBB21_19: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: .LBB21_20: ; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB21_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: j .LBB21_23 +; RV32-NEXT: .LBB21_22: +; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_16: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: .LBB21_23: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -128 @@ -2942,84 +3217,93 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: feq.d a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v12 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB21_2 +; RV64-NEXT: bnez a0, .LBB21_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v12 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB21_4 ; RV64-NEXT: .LBB21_2: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v8, 7 +; RV64-NEXT: j .LBB21_5 +; RV64-NEXT: .LBB21_3: +; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB21_2 +; RV64-NEXT: .LBB21_4: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 +; RV64-NEXT: .LBB21_5: ; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_4: -; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: fsd ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB21_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB21_8 +; RV64-NEXT: .LBB21_7: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_6: -; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: .LBB21_8: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB21_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB21_11 +; RV64-NEXT: .LBB21_10: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_8: -; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: .LBB21_11: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB21_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB21_14 +; RV64-NEXT: .LBB21_13: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_10: -; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: .LBB21_14: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB21_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB21_17 +; RV64-NEXT: .LBB21_16: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_12: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: .LBB21_17: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB21_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_14: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB21_20 +; RV64-NEXT: .LBB21_19: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu -; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: .LBB21_20: ; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB21_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: j .LBB21_23 +; RV64-NEXT: .LBB21_22: +; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_16: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB21_23: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -128 @@ -3044,164 +3328,181 @@ ; RV32-NEXT: addi s0, sp, 256 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -128 -; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v16 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB22_2 +; RV32-NEXT: bnez a0, .LBB22_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v16 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB22_4 ; RV32-NEXT: .LBB22_2: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 15 +; RV32-NEXT: j .LBB22_5 +; RV32-NEXT: .LBB22_3: +; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB22_2 +; RV32-NEXT: .LBB22_4: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 15 +; RV32-NEXT: .LBB22_5: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_4: -; RV32-NEXT: fsd ft1, 120(sp) +; RV32-NEXT: fsd ft0, 120(sp) +; RV32-NEXT: bnez a0, .LBB22_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 14 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_6: -; RV32-NEXT: fsd ft1, 112(sp) +; RV32-NEXT: j .LBB22_8 +; RV32-NEXT: .LBB22_7: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 13 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vslidedown.vi v24, v8, 14 ; RV32-NEXT: .LBB22_8: -; RV32-NEXT: fsd ft1, 104(sp) -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 12 ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: fsd ft0, 112(sp) ; RV32-NEXT: bnez a0, .LBB22_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 13 +; RV32-NEXT: j .LBB22_11 ; RV32-NEXT: .LBB22_10: -; RV32-NEXT: fsd ft1, 96(sp) ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 11 +; RV32-NEXT: vslidedown.vi v24, v8, 13 +; RV32-NEXT: .LBB22_11: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_12: -; RV32-NEXT: fsd ft1, 88(sp) +; RV32-NEXT: fsd ft0, 104(sp) +; RV32-NEXT: bnez a0, .LBB22_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 10 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_14: -; RV32-NEXT: fsd ft1, 80(sp) +; RV32-NEXT: vslidedown.vi v24, v16, 12 +; RV32-NEXT: j .LBB22_14 +; RV32-NEXT: .LBB22_13: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 9 +; RV32-NEXT: vslidedown.vi v24, v8, 12 +; RV32-NEXT: .LBB22_14: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: fsd ft0, 96(sp) ; RV32-NEXT: bnez a0, .LBB22_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 11 +; RV32-NEXT: j .LBB22_17 ; RV32-NEXT: .LBB22_16: -; RV32-NEXT: fsd ft1, 72(sp) ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 8 +; RV32-NEXT: vslidedown.vi v24, v8, 11 +; RV32-NEXT: .LBB22_17: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_18: -; RV32-NEXT: fsd ft1, 64(sp) +; RV32-NEXT: fsd ft0, 88(sp) +; RV32-NEXT: bnez a0, .LBB22_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 7 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_20: -; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: vslidedown.vi v24, v16, 10 +; RV32-NEXT: j .LBB22_20 +; RV32-NEXT: .LBB22_19: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 6 +; RV32-NEXT: vslidedown.vi v24, v8, 10 +; RV32-NEXT: .LBB22_20: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: fsd ft0, 80(sp) ; RV32-NEXT: bnez a0, .LBB22_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 9 +; RV32-NEXT: j .LBB22_23 ; RV32-NEXT: .LBB22_22: -; RV32-NEXT: fsd ft1, 48(sp) ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 5 +; RV32-NEXT: vslidedown.vi v24, v8, 9 +; RV32-NEXT: .LBB22_23: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_24: -; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: fsd ft0, 72(sp) +; RV32-NEXT: bnez a0, .LBB22_25 +; RV32-NEXT: # %bb.24: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 4 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_26: -; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: vslidedown.vi v24, v16, 8 +; RV32-NEXT: j .LBB22_26 +; RV32-NEXT: .LBB22_25: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 3 +; RV32-NEXT: vslidedown.vi v24, v8, 8 +; RV32-NEXT: .LBB22_26: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: fsd ft0, 64(sp) ; RV32-NEXT: bnez a0, .LBB22_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 7 +; RV32-NEXT: j .LBB22_29 ; RV32-NEXT: .LBB22_28: -; RV32-NEXT: fsd ft1, 24(sp) ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 2 +; RV32-NEXT: vslidedown.vi v24, v8, 7 +; RV32-NEXT: .LBB22_29: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_30: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: fsd ft0, 56(sp) +; RV32-NEXT: bnez a0, .LBB22_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v16, v16, 1 -; RV32-NEXT: vfmv.f.s ft0, v16 -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v8 -; RV32-NEXT: bnez a0, .LBB22_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vslidedown.vi v24, v16, 6 +; RV32-NEXT: j .LBB22_32 +; RV32-NEXT: .LBB22_31: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 6 ; RV32-NEXT: .LBB22_32: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 48(sp) +; RV32-NEXT: bnez a0, .LBB22_34 +; RV32-NEXT: # %bb.33: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 5 +; RV32-NEXT: j .LBB22_35 +; RV32-NEXT: .LBB22_34: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 5 +; RV32-NEXT: .LBB22_35: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB22_37 +; RV32-NEXT: # %bb.36: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 4 +; RV32-NEXT: j .LBB22_38 +; RV32-NEXT: .LBB22_37: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 4 +; RV32-NEXT: .LBB22_38: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB22_40 +; RV32-NEXT: # %bb.39: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 3 +; RV32-NEXT: j .LBB22_41 +; RV32-NEXT: .LBB22_40: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: .LBB22_41: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB22_43 +; RV32-NEXT: # %bb.42: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 2 +; RV32-NEXT: j .LBB22_44 +; RV32-NEXT: .LBB22_43: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 2 +; RV32-NEXT: .LBB22_44: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB22_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v16, 1 +; RV32-NEXT: j .LBB22_47 +; RV32-NEXT: .LBB22_46: +; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: .LBB22_47: +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -256 @@ -3221,164 +3522,181 @@ ; RV64-NEXT: addi s0, sp, 256 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -128 -; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v16 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB22_2 +; RV64-NEXT: bnez a0, .LBB22_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v16 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB22_4 ; RV64-NEXT: .LBB22_2: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v8, 15 +; RV64-NEXT: j .LBB22_5 +; RV64-NEXT: .LBB22_3: +; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB22_2 +; RV64-NEXT: .LBB22_4: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 15 +; RV64-NEXT: .LBB22_5: ; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: vslidedown.vi v24, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_4: -; RV64-NEXT: fsd ft1, 120(sp) +; RV64-NEXT: fsd ft0, 120(sp) +; RV64-NEXT: bnez a0, .LBB22_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 14 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_8 +; RV64-NEXT: .LBB22_7: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_6: -; RV64-NEXT: fsd ft1, 112(sp) +; RV64-NEXT: .LBB22_8: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 112(sp) +; RV64-NEXT: bnez a0, .LBB22_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 13 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_11 +; RV64-NEXT: .LBB22_10: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_8: -; RV64-NEXT: fsd ft1, 104(sp) +; RV64-NEXT: .LBB22_11: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 104(sp) +; RV64-NEXT: bnez a0, .LBB22_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 12 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_14 +; RV64-NEXT: .LBB22_13: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_10: -; RV64-NEXT: fsd ft1, 96(sp) +; RV64-NEXT: .LBB22_14: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 96(sp) +; RV64-NEXT: bnez a0, .LBB22_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 11 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_17 +; RV64-NEXT: .LBB22_16: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_12: -; RV64-NEXT: fsd ft1, 88(sp) +; RV64-NEXT: .LBB22_17: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 88(sp) +; RV64-NEXT: bnez a0, .LBB22_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 10 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_20 +; RV64-NEXT: .LBB22_19: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_14: -; RV64-NEXT: fsd ft1, 80(sp) +; RV64-NEXT: .LBB22_20: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 80(sp) +; RV64-NEXT: bnez a0, .LBB22_22 +; RV64-NEXT: # %bb.21: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 9 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_23 +; RV64-NEXT: .LBB22_22: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_16: -; RV64-NEXT: fsd ft1, 72(sp) +; RV64-NEXT: .LBB22_23: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 72(sp) +; RV64-NEXT: bnez a0, .LBB22_25 +; RV64-NEXT: # %bb.24: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 8 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_26 +; RV64-NEXT: .LBB22_25: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_18: -; RV64-NEXT: fsd ft1, 64(sp) +; RV64-NEXT: .LBB22_26: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 64(sp) +; RV64-NEXT: bnez a0, .LBB22_28 +; RV64-NEXT: # %bb.27: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 7 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_29 +; RV64-NEXT: .LBB22_28: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_20: -; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: .LBB22_29: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB22_31 +; RV64-NEXT: # %bb.30: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 6 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_32 +; RV64-NEXT: .LBB22_31: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_22: -; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: .LBB22_32: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB22_34 +; RV64-NEXT: # %bb.33: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 5 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_35 +; RV64-NEXT: .LBB22_34: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_24: -; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: .LBB22_35: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB22_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 4 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_38 +; RV64-NEXT: .LBB22_37: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_26: -; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: .LBB22_38: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB22_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 3 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_41 +; RV64-NEXT: .LBB22_40: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_28: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: .LBB22_41: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB22_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 2 -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: vslidedown.vi v24, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_30: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB22_44 +; RV64-NEXT: .LBB22_43: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV64-NEXT: vslidedown.vi v16, v16, 1 -; RV64-NEXT: vfmv.f.s ft0, v16 +; RV64-NEXT: vslidedown.vi v24, v8, 2 +; RV64-NEXT: .LBB22_44: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB22_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v8, v16, 1 +; RV64-NEXT: j .LBB22_47 +; RV64-NEXT: .LBB22_46: +; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v8 -; RV64-NEXT: bnez a0, .LBB22_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_32: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB22_47: +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -256 @@ -3403,164 +3721,181 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -128 ; RV32-NEXT: feq.d a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v16 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB23_2 +; RV32-NEXT: bnez a0, .LBB23_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v16 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB23_4 ; RV32-NEXT: .LBB23_2: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 15 +; RV32-NEXT: j .LBB23_5 +; RV32-NEXT: .LBB23_3: +; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB23_2 +; RV32-NEXT: .LBB23_4: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 15 +; RV32-NEXT: .LBB23_5: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_4: -; RV32-NEXT: fsd ft1, 120(sp) +; RV32-NEXT: fsd ft0, 120(sp) +; RV32-NEXT: bnez a0, .LBB23_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 14 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_8 +; RV32-NEXT: .LBB23_7: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_6: -; RV32-NEXT: fsd ft1, 112(sp) +; RV32-NEXT: .LBB23_8: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 112(sp) +; RV32-NEXT: bnez a0, .LBB23_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 13 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_11 +; RV32-NEXT: .LBB23_10: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_8: -; RV32-NEXT: fsd ft1, 104(sp) +; RV32-NEXT: .LBB23_11: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 104(sp) +; RV32-NEXT: bnez a0, .LBB23_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 12 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_14 +; RV32-NEXT: .LBB23_13: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_10: -; RV32-NEXT: fsd ft1, 96(sp) +; RV32-NEXT: .LBB23_14: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 96(sp) +; RV32-NEXT: bnez a0, .LBB23_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 11 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_17 +; RV32-NEXT: .LBB23_16: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_12: -; RV32-NEXT: fsd ft1, 88(sp) +; RV32-NEXT: .LBB23_17: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 88(sp) +; RV32-NEXT: bnez a0, .LBB23_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 10 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_20 +; RV32-NEXT: .LBB23_19: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_14: -; RV32-NEXT: fsd ft1, 80(sp) +; RV32-NEXT: .LBB23_20: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 80(sp) +; RV32-NEXT: bnez a0, .LBB23_22 +; RV32-NEXT: # %bb.21: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 9 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_23 +; RV32-NEXT: .LBB23_22: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_16: -; RV32-NEXT: fsd ft1, 72(sp) +; RV32-NEXT: .LBB23_23: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 72(sp) +; RV32-NEXT: bnez a0, .LBB23_25 +; RV32-NEXT: # %bb.24: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 8 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_26 +; RV32-NEXT: .LBB23_25: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_18: -; RV32-NEXT: fsd ft1, 64(sp) +; RV32-NEXT: .LBB23_26: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 64(sp) +; RV32-NEXT: bnez a0, .LBB23_28 +; RV32-NEXT: # %bb.27: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 7 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_29 +; RV32-NEXT: .LBB23_28: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_20: -; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: .LBB23_29: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 56(sp) +; RV32-NEXT: bnez a0, .LBB23_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 6 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_32 +; RV32-NEXT: .LBB23_31: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_22: -; RV32-NEXT: fsd ft1, 48(sp) +; RV32-NEXT: .LBB23_32: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 48(sp) +; RV32-NEXT: bnez a0, .LBB23_34 +; RV32-NEXT: # %bb.33: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 5 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_35 +; RV32-NEXT: .LBB23_34: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_24: -; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: .LBB23_35: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB23_37 +; RV32-NEXT: # %bb.36: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 4 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_38 +; RV32-NEXT: .LBB23_37: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_26: -; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: .LBB23_38: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB23_40 +; RV32-NEXT: # %bb.39: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 3 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_41 +; RV32-NEXT: .LBB23_40: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_28: -; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: .LBB23_41: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB23_43 +; RV32-NEXT: # %bb.42: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 2 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_30: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: j .LBB23_44 +; RV32-NEXT: .LBB23_43: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v16, v16, 1 -; RV32-NEXT: vfmv.f.s ft0, v16 +; RV32-NEXT: vslidedown.vi v24, v8, 2 +; RV32-NEXT: .LBB23_44: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB23_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v16, 1 +; RV32-NEXT: j .LBB23_47 +; RV32-NEXT: .LBB23_46: +; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v8 -; RV32-NEXT: bnez a0, .LBB23_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_32: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: .LBB23_47: +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -256 @@ -3581,164 +3916,181 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -128 ; RV64-NEXT: feq.d a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v16 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB23_2 +; RV64-NEXT: bnez a0, .LBB23_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v16 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB23_4 ; RV64-NEXT: .LBB23_2: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v8, 15 +; RV64-NEXT: j .LBB23_5 +; RV64-NEXT: .LBB23_3: +; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB23_2 +; RV64-NEXT: .LBB23_4: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 15 +; RV64-NEXT: .LBB23_5: ; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: vslidedown.vi v24, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_4: -; RV64-NEXT: fsd ft1, 120(sp) +; RV64-NEXT: fsd ft0, 120(sp) +; RV64-NEXT: bnez a0, .LBB23_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 14 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_8 +; RV64-NEXT: .LBB23_7: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_6: -; RV64-NEXT: fsd ft1, 112(sp) +; RV64-NEXT: .LBB23_8: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 112(sp) +; RV64-NEXT: bnez a0, .LBB23_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 13 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_11 +; RV64-NEXT: .LBB23_10: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_8: -; RV64-NEXT: fsd ft1, 104(sp) +; RV64-NEXT: .LBB23_11: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 104(sp) +; RV64-NEXT: bnez a0, .LBB23_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 12 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_14 +; RV64-NEXT: .LBB23_13: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_10: -; RV64-NEXT: fsd ft1, 96(sp) +; RV64-NEXT: .LBB23_14: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 96(sp) +; RV64-NEXT: bnez a0, .LBB23_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 11 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_17 +; RV64-NEXT: .LBB23_16: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_12: -; RV64-NEXT: fsd ft1, 88(sp) +; RV64-NEXT: .LBB23_17: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 88(sp) +; RV64-NEXT: bnez a0, .LBB23_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 10 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_20 +; RV64-NEXT: .LBB23_19: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_14: -; RV64-NEXT: fsd ft1, 80(sp) +; RV64-NEXT: .LBB23_20: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 80(sp) +; RV64-NEXT: bnez a0, .LBB23_22 +; RV64-NEXT: # %bb.21: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 9 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_23 +; RV64-NEXT: .LBB23_22: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_16: -; RV64-NEXT: fsd ft1, 72(sp) +; RV64-NEXT: .LBB23_23: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 72(sp) +; RV64-NEXT: bnez a0, .LBB23_25 +; RV64-NEXT: # %bb.24: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 8 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_26 +; RV64-NEXT: .LBB23_25: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_18: -; RV64-NEXT: fsd ft1, 64(sp) +; RV64-NEXT: .LBB23_26: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 64(sp) +; RV64-NEXT: bnez a0, .LBB23_28 +; RV64-NEXT: # %bb.27: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 7 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_29 +; RV64-NEXT: .LBB23_28: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_20: -; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: .LBB23_29: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB23_31 +; RV64-NEXT: # %bb.30: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 6 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_32 +; RV64-NEXT: .LBB23_31: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_22: -; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: .LBB23_32: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB23_34 +; RV64-NEXT: # %bb.33: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 5 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_35 +; RV64-NEXT: .LBB23_34: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_24: -; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: .LBB23_35: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB23_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 4 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_38 +; RV64-NEXT: .LBB23_37: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_26: -; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: .LBB23_38: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB23_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 3 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_41 +; RV64-NEXT: .LBB23_40: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_28: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: .LBB23_41: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB23_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 2 -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: vslidedown.vi v24, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_30: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB23_44 +; RV64-NEXT: .LBB23_43: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV64-NEXT: vslidedown.vi v16, v16, 1 -; RV64-NEXT: vfmv.f.s ft0, v16 +; RV64-NEXT: vslidedown.vi v24, v8, 2 +; RV64-NEXT: .LBB23_44: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB23_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v8, v16, 1 +; RV64-NEXT: j .LBB23_47 +; RV64-NEXT: .LBB23_46: +; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v8 -; RV64-NEXT: bnez a0, .LBB23_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_32: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB23_47: +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -256 diff --git a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll --- a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll @@ -15,8 +15,7 @@ ; CHECK: bb.0.entry: ; CHECK: liveins: $v8 ; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v8 - ; CHECK: dead %2:gpr = PseudoVSETIVLI 1, 88, implicit-def $vl, implicit-def $vtype - ; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 6 ; CHECK: [[LD:%[0-9]+]]:gpr = LD %stack.0.a, 0 :: (dereferenceable load 8 from %ir.a) ; CHECK: $x10 = COPY [[LD]] ; CHECK: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir --- a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir +++ b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir @@ -15,20 +15,21 @@ liveins: $v0, $v1, $v2, $v3 ; CHECK-LABEL: name: mask_reg_alloc ; CHECK: liveins: $v0, $v1, $v2, $v3 - ; CHECK: renamable $v25 = PseudoVMERGE_VIM_M1 killed renamable $v2, 1, killed renamable $v0, $noreg, -1, implicit $vl, implicit $vtype + ; CHECK: dead renamable $x10 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype + ; CHECK: renamable $v25 = PseudoVMERGE_VIM_M1 killed renamable $v2, 1, killed renamable $v0, 1, 3, implicit $vl, implicit $vtype ; CHECK: renamable $v0 = COPY killed renamable $v1 - ; CHECK: renamable $v26 = PseudoVMERGE_VIM_M1 killed renamable $v3, 1, killed renamable $v0, $noreg, -1, implicit $vl, implicit $vtype - ; CHECK: renamable $v0 = PseudoVADD_VV_M1 killed renamable $v25, killed renamable $v26, $noreg, -1, implicit $vl, implicit $vtype + ; CHECK: renamable $v26 = PseudoVMERGE_VIM_M1 killed renamable $v3, 1, killed renamable $v0, 1, 3, implicit $vl, implicit $vtype + ; CHECK: renamable $v0 = PseudoVADD_VV_M1 killed renamable $v25, killed renamable $v26, 1, 3, implicit $vl, implicit $vtype ; CHECK: PseudoRET implicit $v0 %0:vr = COPY $v0 %1:vr = COPY $v1 %2:vr = COPY $v2 %3:vr = COPY $v3 %4:vmv0 = COPY %0 - %5:vrnov0 = PseudoVMERGE_VIM_M1 killed %2, 1, %4, $noreg, -1, implicit $vl, implicit $vtype + %5:vrnov0 = PseudoVMERGE_VIM_M1 killed %2, 1, %4, 1, 3 %6:vmv0 = COPY %1 - %7:vrnov0 = PseudoVMERGE_VIM_M1 killed %3, 1, %6, $noreg, -1, implicit $vl, implicit $vtype - %8:vr = PseudoVADD_VV_M1 killed %5, killed %7, $noreg, -1, implicit $vl, implicit $vtype + %7:vrnov0 = PseudoVMERGE_VIM_M1 killed %3, 1, %6, 1, 3 + %8:vr = PseudoVADD_VV_M1 killed %5, killed %7, 1, 3 $v0 = COPY %8 PseudoRET implicit $v0 ... diff --git a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir --- a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir +++ b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir @@ -52,8 +52,7 @@ ; CHECK: $v0 = COPY [[COPY]] ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK: [[COPY2:%[0-9]+]]:vrm8nov0 = COPY [[DEF]] - ; CHECK: dead %5:gpr = PseudoVSETVLI $x0, 91, implicit-def $vl, implicit-def $vtype - ; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $noreg, 6, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8) + ; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $x0, 6 :: (load 64 from %ir.a, align 8) ; CHECK: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]] ; CHECK: PseudoRET implicit $v8m8 %1:vr = COPY $v0 @@ -61,7 +60,7 @@ $v0 = COPY %1 %3:vrm8 = IMPLICIT_DEF %4:vrm8nov0 = COPY %3 - %2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 6, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8) + %2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 6 :: (load 64 from %ir.a, align 8) $v8m8 = COPY %2 PseudoRET implicit $v8m8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -13,7 +13,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -28,7 +27,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fadd.nxv1f16(half %s, %v) @@ -44,7 +42,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.h fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -59,7 +56,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fadd.nxv2f16(half %s, %v) @@ -102,7 +98,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -117,7 +112,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) @@ -160,7 +154,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.s fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -175,7 +168,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %v) @@ -218,7 +210,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.d fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -233,7 +224,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %v) @@ -249,7 +239,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fadd.d fa0, fa0, ft0 ; CHECK-NEXT: ret @@ -264,7 +253,6 @@ ; CHECK-NEXT: vfmv.v.f v25, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfredosum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) @@ -282,7 +270,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmin.nxv1f16( %v) @@ -298,7 +285,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16( %v) @@ -314,7 +300,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16( %v) @@ -332,7 +317,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmin.nxv2f16( %v) @@ -368,7 +352,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmin.nxv64f16( %v) @@ -386,7 +369,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmin.nxv1f32( %v) @@ -402,7 +384,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32( %v) @@ -418,7 +399,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32( %v) @@ -452,7 +432,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmin.nxv4f32( %v) @@ -472,7 +451,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmin.nxv32f32( %v) @@ -534,7 +512,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmin.nxv2f64( %v) @@ -552,7 +529,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmin.nxv4f64( %v) @@ -572,7 +548,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu ; CHECK-NEXT: vfredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmin.nxv16f64( %v) @@ -590,7 +565,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmax.nxv1f16( %v) @@ -606,7 +580,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16( %v) @@ -622,7 +595,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16( %v) @@ -640,7 +612,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmax.nxv2f16( %v) @@ -676,7 +647,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call half @llvm.vector.reduce.fmax.nxv64f16( %v) @@ -694,7 +664,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmax.nxv1f32( %v) @@ -710,7 +679,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32( %v) @@ -726,7 +694,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32( %v) @@ -760,7 +727,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmax.nxv4f32( %v) @@ -780,7 +746,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call float @llvm.vector.reduce.fmax.nxv32f32( %v) @@ -842,7 +807,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmax.nxv2f64( %v) @@ -860,7 +824,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmax.nxv4f64( %v) @@ -880,7 +843,6 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu ; CHECK-NEXT: vfredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s fa0, v25 ; CHECK-NEXT: ret %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll @@ -10,7 +10,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.add.nxv1i8( %v) @@ -26,7 +25,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) @@ -43,7 +41,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) @@ -59,7 +56,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) @@ -76,7 +72,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) @@ -92,7 +87,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) @@ -108,7 +102,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) @@ -124,7 +117,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.xor.nxv1i8( %v) @@ -140,7 +132,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.add.nxv2i8( %v) @@ -156,7 +147,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) @@ -173,7 +163,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) @@ -189,7 +178,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) @@ -206,7 +194,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) @@ -222,7 +209,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) @@ -238,7 +224,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) @@ -254,7 +239,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.xor.nxv2i8( %v) @@ -270,7 +254,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.add.nxv4i8( %v) @@ -286,7 +269,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) @@ -303,7 +285,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) @@ -319,7 +300,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) @@ -336,7 +316,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) @@ -352,7 +331,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) @@ -368,7 +346,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) @@ -384,7 +361,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.xor.nxv4i8( %v) @@ -400,7 +376,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.add.nxv1i16( %v) @@ -416,7 +391,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) @@ -433,7 +407,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) @@ -449,7 +422,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) @@ -467,7 +439,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) @@ -483,7 +454,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) @@ -499,7 +469,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) @@ -515,7 +484,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.xor.nxv1i16( %v) @@ -531,7 +499,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.add.nxv2i16( %v) @@ -547,7 +514,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) @@ -564,7 +530,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) @@ -580,7 +545,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) @@ -598,7 +562,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) @@ -614,7 +577,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) @@ -630,7 +592,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) @@ -646,7 +607,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.xor.nxv2i16( %v) @@ -777,7 +737,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.add.nxv1i32( %v) @@ -793,7 +752,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) @@ -810,7 +768,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) @@ -826,7 +783,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) @@ -844,7 +800,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) @@ -860,7 +815,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) @@ -876,7 +830,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) @@ -892,7 +845,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.xor.nxv1i32( %v) @@ -1023,7 +975,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.add.nxv4i32( %v) @@ -1039,7 +990,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) @@ -1056,7 +1006,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) @@ -1072,7 +1021,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) @@ -1090,7 +1038,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) @@ -1106,7 +1053,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) @@ -1122,7 +1068,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) @@ -1138,7 +1083,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.xor.nxv4i32( %v) @@ -1314,7 +1258,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1334,7 +1277,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1360,7 +1302,6 @@ ; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1381,7 +1322,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1409,7 +1349,6 @@ ; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1430,7 +1369,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1450,7 +1388,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1470,7 +1407,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1490,7 +1426,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1510,7 +1445,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1536,7 +1470,6 @@ ; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1557,7 +1490,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1585,7 +1517,6 @@ ; CHECK-NEXT: vlse64.v v25, (a0), zero ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1606,7 +1537,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1626,7 +1556,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu @@ -1646,7 +1575,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: addi a1, zero, 32 ; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll @@ -10,7 +10,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.add.nxv1i8( %v) @@ -26,7 +25,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umax.nxv1i8( %v) @@ -43,7 +41,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smax.nxv1i8( %v) @@ -59,7 +56,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umin.nxv1i8( %v) @@ -76,7 +72,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smin.nxv1i8( %v) @@ -92,7 +87,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.and.nxv1i8( %v) @@ -108,7 +102,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.or.nxv1i8( %v) @@ -124,7 +117,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.xor.nxv1i8( %v) @@ -140,7 +132,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.add.nxv2i8( %v) @@ -156,7 +147,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umax.nxv2i8( %v) @@ -173,7 +163,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smax.nxv2i8( %v) @@ -189,7 +178,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umin.nxv2i8( %v) @@ -206,7 +194,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smin.nxv2i8( %v) @@ -222,7 +209,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.and.nxv2i8( %v) @@ -238,7 +224,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.or.nxv2i8( %v) @@ -254,7 +239,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.xor.nxv2i8( %v) @@ -270,7 +254,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.add.nxv4i8( %v) @@ -286,7 +269,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umax.nxv4i8( %v) @@ -303,7 +285,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smax.nxv4i8( %v) @@ -319,7 +300,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.umin.nxv4i8( %v) @@ -336,7 +316,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.smin.nxv4i8( %v) @@ -352,7 +331,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.and.nxv4i8( %v) @@ -368,7 +346,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.or.nxv4i8( %v) @@ -384,7 +361,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i8 @llvm.vector.reduce.xor.nxv4i8( %v) @@ -400,7 +376,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.add.nxv1i16( %v) @@ -416,7 +391,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umax.nxv1i16( %v) @@ -433,7 +407,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smax.nxv1i16( %v) @@ -449,7 +422,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umin.nxv1i16( %v) @@ -467,7 +439,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smin.nxv1i16( %v) @@ -483,7 +454,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.and.nxv1i16( %v) @@ -499,7 +469,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.or.nxv1i16( %v) @@ -515,7 +484,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.xor.nxv1i16( %v) @@ -531,7 +499,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.add.nxv2i16( %v) @@ -547,7 +514,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umax.nxv2i16( %v) @@ -564,7 +530,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smax.nxv2i16( %v) @@ -580,7 +545,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.umin.nxv2i16( %v) @@ -598,7 +562,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.smin.nxv2i16( %v) @@ -614,7 +577,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.and.nxv2i16( %v) @@ -630,7 +592,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.or.nxv2i16( %v) @@ -646,7 +607,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i16 @llvm.vector.reduce.xor.nxv2i16( %v) @@ -777,7 +737,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.add.nxv1i32( %v) @@ -793,7 +752,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umax.nxv1i32( %v) @@ -810,7 +768,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smax.nxv1i32( %v) @@ -826,7 +783,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umin.nxv1i32( %v) @@ -844,7 +800,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smin.nxv1i32( %v) @@ -860,7 +815,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.and.nxv1i32( %v) @@ -876,7 +830,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.or.nxv1i32( %v) @@ -892,7 +845,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.xor.nxv1i32( %v) @@ -1023,7 +975,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.add.nxv4i32( %v) @@ -1039,7 +990,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umax.nxv4i32( %v) @@ -1056,7 +1006,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smax.nxv4i32( %v) @@ -1072,7 +1021,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.umin.nxv4i32( %v) @@ -1090,7 +1038,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.smin.nxv4i32( %v) @@ -1106,7 +1053,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.and.nxv4i32( %v) @@ -1122,7 +1068,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.or.nxv4i32( %v) @@ -1138,7 +1083,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i32 @llvm.vector.reduce.xor.nxv4i32( %v) @@ -1270,7 +1214,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.add.nxv2i64( %v) @@ -1286,7 +1229,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.umax.nxv2i64( %v) @@ -1304,7 +1246,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.smax.nxv2i64( %v) @@ -1320,7 +1261,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.umin.nxv2i64( %v) @@ -1338,7 +1278,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.smin.nxv2i64( %v) @@ -1354,7 +1293,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.and.nxv2i64( %v) @@ -1370,7 +1308,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.or.nxv2i64( %v) @@ -1386,7 +1323,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.xor.nxv2i64( %v) @@ -1402,7 +1338,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredsum.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.add.nxv4i64( %v) @@ -1418,7 +1353,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredmaxu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.umax.nxv4i64( %v) @@ -1436,7 +1370,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredmax.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.smax.nxv4i64( %v) @@ -1452,7 +1385,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredminu.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.umin.nxv4i64( %v) @@ -1470,7 +1402,6 @@ ; CHECK-NEXT: vmv.v.x v25, a0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredmin.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.smin.nxv4i64( %v) @@ -1486,7 +1417,6 @@ ; CHECK-NEXT: vmv.v.i v25, -1 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredand.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.and.nxv4i64( %v) @@ -1502,7 +1432,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.or.nxv4i64( %v) @@ -1518,7 +1447,6 @@ ; CHECK-NEXT: vmv.v.i v25, 0 ; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: vredxor.vs v25, v8, v25 -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vmv.x.s a0, v25 ; CHECK-NEXT: ret %red = call i64 @llvm.vector.reduce.xor.nxv4i64( %v) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -0,0 +1,354 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \ +# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.ll" + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "riscv64" + + ; Function Attrs: nounwind + define @add( %0, %1, i64 %2) #0 { + entry: + %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %0, %1, i64 %2) + ret %a + } + + ; Function Attrs: nounwind + define @load_add(* %0, %1, i64 %2) #0 { + entry: + %a = call @llvm.riscv.vle.nxv1i64.i64(* %0, i64 %2) + %b = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %a, %1, i64 %2) + ret %b + } + + ; Function Attrs: nounwind + define @load_zext(* %0, i64 %1) #0 { + entry: + %a = call @llvm.riscv.vle.nxv1i32.i64(* %0, i64 %1) + %b = call @llvm.riscv.vzext.nxv1i64.nxv1i32.i64( %a, i64 %1) + ret %b + } + + ; Function Attrs: nounwind readnone + declare i64 @llvm.riscv.vmv.x.s.nxv1i64() #1 + + ; Function Attrs: nounwind + define i64 @vmv_x_s( %0) #0 { + entry: + %a = call i64 @llvm.riscv.vmv.x.s.nxv1i64( %0) + ret i64 %a + } + + define void @add_v2i64(<2 x i64>* %x, <2 x i64>* %y) #2 { + %a = load <2 x i64>, <2 x i64>* %x, align 16 + %b = load <2 x i64>, <2 x i64>* %y, align 16 + %c = add <2 x i64> %a, %b + store <2 x i64> %c, <2 x i64>* %x, align 16 + ret void + } + + ; Function Attrs: nofree nosync nounwind readnone willreturn + declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #3 + + define i64 @vreduce_add_v2i64(<2 x i64>* %x) #2 { + %v = load <2 x i64>, <2 x i64>* %x, align 16 + %red = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %v) + ret i64 %red + } + + ; Function Attrs: nounwind + declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #0 + + ; Function Attrs: nounwind + define @vsetvli_add( %0, %1, i64 %avl) #0 { + entry: + %a = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 1) + %b = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %0, %1, i64 %a) + ret %b + } + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(, , i64) #1 + + ; Function Attrs: nounwind readonly + declare @llvm.riscv.vle.nxv1i64.i64(* nocapture, i64) #2 + + ; Function Attrs: nounwind readonly + declare @llvm.riscv.vle.nxv1i32.i64(* nocapture, i64) #2 + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(, i64) #1 + + attributes #0 = { nounwind "target-features"="+experimental-v" } + attributes #1 = { nounwind readnone "target-features"="+experimental-v" } + attributes #2 = { "target-features"="+experimental-v" } + attributes #3 = { nofree nosync nounwind readnone willreturn "target-features"="+experimental-v" } + attributes #4 = { nounwind readonly "target-features"="+experimental-v" } + +... +--- +name: add +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: gpr } + - { id: 3, class: vr } +liveins: + - { reg: '$v8', virtual-reg: '%0' } + - { reg: '$v9', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8, $v9, $x10 + + ; CHECK-LABEL: name: add + ; CHECK: liveins: $v8, $v9, $x10 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK: dead %4:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]] + ; CHECK: PseudoRET implicit $v8 + %2:gpr = COPY $x10 + %1:vr = COPY $v9 + %0:vr = COPY $v8 + %3:vr = PseudoVADD_VV_M1 %0, %1, %2, 6 + $v8 = COPY %3 + PseudoRET implicit $v8 + +... +--- +name: load_add +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: vr } + - { id: 2, class: gpr } + - { id: 3, class: vr } + - { id: 4, class: vr } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$v8', virtual-reg: '%1' } + - { reg: '$x11', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10, $v8, $x11 + + ; CHECK-LABEL: name: load_add + ; CHECK: liveins: $x10, $v8, $x11 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v8 + ; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %5:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]] + ; CHECK: PseudoRET implicit $v8 + %2:gpr = COPY $x11 + %1:vr = COPY $v8 + %0:gpr = COPY $x10 + %3:vr = PseudoVLE64_V_M1 %0, %2, 6 + %4:vr = PseudoVADD_VV_M1 killed %3, %1, %2, 6 + $v8 = COPY %4 + PseudoRET implicit $v8 + +... +--- +name: load_zext +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: vr } + - { id: 3, class: vr } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: load_zext + ; CHECK: liveins: $x10, $x11 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %4:gpr = PseudoVSETVLI [[COPY]], 87, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY1]], $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: dead %5:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: early-clobber %3:vr = PseudoVZEXT_VF2_M1 killed [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: $v8 = COPY %3 + ; CHECK: PseudoRET implicit $v8 + %1:gpr = COPY $x11 + %0:gpr = COPY $x10 + %2:vr = PseudoVLE32_V_MF2 %0, %1, 5 + early-clobber %3:vr = PseudoVZEXT_VF2_M1 killed %2, %1, 6 + $v8 = COPY %3 + PseudoRET implicit $v8 + +... +--- +name: vmv_x_s +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: gpr } +liveins: + - { reg: '$v8', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8 + + ; CHECK-LABEL: name: vmv_x_s + ; CHECK: liveins: $v8 + ; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; CHECK: dead $x0 = PseudoVSETVLI killed $x0, 88, implicit-def $vl, implicit-def $vtype, implicit $vl + ; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[COPY]], 6, implicit $vtype + ; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]] + ; CHECK: PseudoRET implicit $x10 + %0:vr = COPY $v8 + %1:gpr = PseudoVMV_X_S_M1 %0, 6 + $x10 = COPY %1 + PseudoRET implicit $x10 + +... +--- +name: add_v2i64 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: vr } + - { id: 3, class: vr } + - { id: 4, class: vr } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $x10, $x11 + + ; CHECK-LABEL: name: add_v2i64 + ; CHECK: liveins: $x10, $x11 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %5:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.x) + ; CHECK: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.y) + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (store 16 into %ir.x) + ; CHECK: PseudoRET + %1:gpr = COPY $x11 + %0:gpr = COPY $x10 + %2:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load 16 from %ir.x) + %3:vr = PseudoVLE64_V_M1 %1, 2, 6 :: (load 16 from %ir.y) + %4:vr = PseudoVADD_VV_M1 killed %2, killed %3, 2, 6 + PseudoVSE64_V_M1 killed %4, %0, 2, 6 :: (store 16 into %ir.x) + PseudoRET + +... +--- +name: vreduce_add_v2i64 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: vr } + - { id: 2, class: vr } + - { id: 3, class: vr } + - { id: 4, class: vr } + - { id: 5, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vreduce_add_v2i64 + ; CHECK: liveins: $x10 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %6:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.x) + ; CHECK: dead %7:gpr = PseudoVSETVLI $x0, 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 0, $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK: dead %8:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVREDSUM_VS_M1_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1 [[DEF]], killed [[PseudoVLE64_V_M1_]], killed [[PseudoVMV_V_I_M1_]], 2, 6, implicit $vl, implicit $vtype + ; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 killed [[PseudoVREDSUM_VS_M1_]], 6, implicit $vtype + ; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]] + ; CHECK: PseudoRET implicit $x10 + %0:gpr = COPY $x10 + %1:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load 16 from %ir.x) + %2:vr = PseudoVMV_V_I_M1 0, $x0, 6 + %4:vr = IMPLICIT_DEF + %3:vr = PseudoVREDSUM_VS_M1 %4, killed %1, killed %2, 2, 6 + %5:gpr = PseudoVMV_X_S_M1 killed %3, 6 + $x10 = COPY %5 + PseudoRET implicit $x10 + +... +--- +name: vsetvli_add +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: vr } +liveins: + - { reg: '$v8', virtual-reg: '%0' } + - { reg: '$v9', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8, $v9, $x10 + + ; CHECK-LABEL: name: vsetvli_add + ; CHECK: liveins: $v8, $v9, $x10 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]] + ; CHECK: PseudoRET implicit $v8 + %2:gpr = COPY $x10 + %1:vr = COPY $v9 + %0:vr = COPY $v8 + %3:gpr = PseudoVSETVLI %2, 88, implicit-def dead $vl, implicit-def dead $vtype + %4:vr = PseudoVADD_VV_M1 %0, %1, killed %3, 6 + $v8 = COPY %4 + PseudoRET implicit $v8 + +... diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=riscv64 -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s +# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s --- | target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" @@ -41,7 +41,7 @@ ; CHECK: PseudoRET %0:gpr = COPY $x10 %1:gpr = COPY $x11 - $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 %0, %1, 6, implicit $vl, implicit $vtype + $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 %0, %1, 6 PseudoVSPILL7_M1 killed renamable $v0_v1_v2_v3_v4_v5_v6, %stack.0, $x0 renamable $v7_v8_v9_v10_v11_v12_v13 = PseudoVRELOAD7_M1 %stack.0, $x0 VS1R_V killed $v8, %0:gpr