diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -21,10 +21,10 @@ add_llvm_target(RISCVCodeGen RISCVAsmPrinter.cpp RISCVCallLowering.cpp - RISCVCleanupVSETVLI.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp RISCVFrameLowering.cpp + RISCVInsertVSETVLI.cpp RISCVInstrInfo.cpp RISCVInstructionSelector.cpp RISCVISelDAGToDAG.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -46,8 +46,8 @@ FunctionPass *createRISCVExpandAtomicPseudoPass(); void initializeRISCVExpandAtomicPseudoPass(PassRegistry &); -FunctionPass *createRISCVCleanupVSETVLIPass(); -void initializeRISCVCleanupVSETVLIPass(PassRegistry &); +FunctionPass *createRISCVInsertVSETVLIPass(); +void initializeRISCVInsertVSETVLIPass(PassRegistry &); InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, RISCVSubtarget &, diff --git a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp deleted file mode 100644 --- a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp +++ /dev/null @@ -1,163 +0,0 @@ -//===- RISCVCleanupVSETVLI.cpp - Cleanup unneeded VSETVLI instructions ----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a function pass that removes duplicate vsetvli -// instructions within a basic block. -// -//===----------------------------------------------------------------------===// - -#include "RISCV.h" -#include "RISCVSubtarget.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -using namespace llvm; - -#define DEBUG_TYPE "riscv-cleanup-vsetvli" -#define RISCV_CLEANUP_VSETVLI_NAME "RISCV Cleanup VSETVLI pass" - -namespace { - -class RISCVCleanupVSETVLI : public MachineFunctionPass { -public: - static char ID; - - RISCVCleanupVSETVLI() : MachineFunctionPass(ID) { - initializeRISCVCleanupVSETVLIPass(*PassRegistry::getPassRegistry()); - } - bool runOnMachineFunction(MachineFunction &MF) override; - bool runOnMachineBasicBlock(MachineBasicBlock &MBB); - - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::IsSSA); - } - - // This pass modifies the program, but does not modify the CFG - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - StringRef getPassName() const override { return RISCV_CLEANUP_VSETVLI_NAME; } -}; - -} // end anonymous namespace - -char RISCVCleanupVSETVLI::ID = 0; - -INITIALIZE_PASS(RISCVCleanupVSETVLI, DEBUG_TYPE, - RISCV_CLEANUP_VSETVLI_NAME, false, false) - -static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) { - // If we don't have a previous VSET{I}VLI or the VL output isn't dead, we - // can't remove this VSETVLI. - if (!PrevVSETVLI || !MI.getOperand(0).isDead()) - return false; - - // Does this VSET{I}VLI use the same VTYPE immediate. - int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm(); - int64_t VTYPEImm = MI.getOperand(2).getImm(); - if (PrevVTYPEImm != VTYPEImm) - return false; - - if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { - // If the previous opcode wasn't vsetivli we can't compare them. - if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETIVLI) - return false; - - // For VSETIVLI, we can just compare the immediates. - return PrevVSETVLI->getOperand(1).getImm() == MI.getOperand(1).getImm(); - } - - assert(MI.getOpcode() == RISCV::PseudoVSETVLI); - Register AVLReg = MI.getOperand(1).getReg(); - Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg(); - - // If this VSETVLI isn't changing VL, it is redundant. - if (AVLReg == RISCV::X0 && MI.getOperand(0).getReg() == RISCV::X0) - return true; - - // If the previous VSET{I}VLI's output (which isn't X0) is fed into this - // VSETVLI, this one isn't changing VL so is redundant. - // Only perform this on virtual registers to avoid the complexity of having - // to work out if the physical register was clobbered somewhere in between. - if (AVLReg.isVirtual() && AVLReg == PrevOutVL) - return true; - - // If the previous opcode isn't vsetvli we can't do any more comparison. - if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETVLI) - return false; - - // Does this VSETVLI use the same AVL register? - if (AVLReg != PrevVSETVLI->getOperand(1).getReg()) - return false; - - // If the AVLReg is X0 we must be setting VL to VLMAX. Keeping VL unchanged - // was handled above. - if (AVLReg == RISCV::X0) { - // This instruction is setting VL to VLMAX, this is redundant if the - // previous VSETVLI was also setting VL to VLMAX. But it is not redundant - // if they were setting it to any other value or leaving VL unchanged. - return PrevOutVL != RISCV::X0; - } - - // This vsetvli is redundant. - return true; -} - -bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) { - bool Changed = false; - MachineInstr *PrevVSETVLI = nullptr; - - for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) { - MachineInstr &MI = *MII++; - - if (MI.getOpcode() != RISCV::PseudoVSETVLI && - MI.getOpcode() != RISCV::PseudoVSETIVLI) { - if (PrevVSETVLI && - (MI.isCall() || MI.modifiesRegister(RISCV::VL) || - MI.modifiesRegister(RISCV::VTYPE))) { - // Old VL/VTYPE is overwritten. - PrevVSETVLI = nullptr; - } - continue; - } - - if (isRedundantVSETVLI(MI, PrevVSETVLI)) { - // This VSETVLI is redundant, remove it. - MI.eraseFromParent(); - Changed = true; - } else { - // Otherwise update VSET{I}VLI for the next iteration. - PrevVSETVLI = &MI; - } - } - - return Changed; -} - -bool RISCVCleanupVSETVLI::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction())) - return false; - - // Skip if the vector extension is not enabled. - const RISCVSubtarget &ST = MF.getSubtarget(); - if (!ST.hasStdExtV()) - return false; - - bool Changed = false; - - for (MachineBasicBlock &MBB : MF) - Changed |= runOnMachineBasicBlock(MBB); - - return Changed; -} - -/// Returns an instance of the Cleanup VSETVLI pass. -FunctionPass *llvm::createRISCVCleanupVSETVLIPass() { - return new RISCVCleanupVSETVLI(); -} diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -240,7 +240,8 @@ bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { - assert(MBBI->getNumOperands() == 5 && "Unexpected instruction format"); + assert(MBBI->getNumExplicitOperands() == 3 && MBBI->getNumOperands() >= 5 && + "Unexpected instruction format"); DebugLoc DL = MBBI->getDebugLoc(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6434,107 +6434,9 @@ return TailMBB; } -static MachineInstr *elideCopies(MachineInstr *MI, - const MachineRegisterInfo &MRI) { - while (true) { - if (!MI->isFullCopy()) - return MI; - if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) - return nullptr; - MI = MRI.getVRegDef(MI->getOperand(1).getReg()); - if (!MI) - return nullptr; - } -} - -static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB, - int VLIndex, unsigned SEWIndex, - RISCVII::VLMUL VLMul, - bool ForceTailAgnostic) { - MachineFunction &MF = *BB->getParent(); - DebugLoc DL = MI.getDebugLoc(); - const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); - - unsigned Log2SEW = MI.getOperand(SEWIndex).getImm(); - assert(RISCVVType::isValidSEW(1 << Log2SEW) && "Unexpected SEW"); - RISCVII::VSEW ElementWidth = static_cast(Log2SEW - 3); - - MachineRegisterInfo &MRI = MF.getRegInfo(); - - auto BuildVSETVLI = [&]() { - if (VLIndex >= 0) { - Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - const MachineOperand &VLOp = MI.getOperand(VLIndex); - - // VL can be a register or an immediate. - if (VLOp.isImm()) - return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI)) - .addReg(DestReg, RegState::Define | RegState::Dead) - .addImm(VLOp.getImm()); - - Register VLReg = MI.getOperand(VLIndex).getReg(); - return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) - .addReg(DestReg, RegState::Define | RegState::Dead) - .addReg(VLReg); - } - - // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0). - return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill); - }; - - MachineInstrBuilder MIB = BuildVSETVLI(); - - // Default to tail agnostic unless the destination is tied to a source. In - // that case the user would have some control over the tail values. The tail - // policy is also ignored on instructions that only update element 0 like - // vmv.s.x or reductions so use agnostic there to match the common case. - // FIXME: This is conservatively correct, but we might want to detect that - // the input is undefined. - bool TailAgnostic = true; - unsigned UseOpIdx; - if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { - TailAgnostic = false; - // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. - const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); - if (UseMI) { - UseMI = elideCopies(UseMI, MRI); - if (UseMI && UseMI->isImplicitDef()) - TailAgnostic = true; - } - } - - // For simplicity we reuse the vtype representation here. - MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth, - /*TailAgnostic*/ TailAgnostic, - /*MaskAgnostic*/ false)); - - // Remove (now) redundant operands from pseudo - if (VLIndex >= 0 && MI.getOperand(VLIndex).isReg()) { - MI.getOperand(VLIndex).setReg(RISCV::NoRegister); - MI.getOperand(VLIndex).setIsKill(false); - } - - return BB; -} - MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { - uint64_t TSFlags = MI.getDesc().TSFlags; - - if (RISCVII::hasSEWOp(TSFlags)) { - unsigned NumOperands = MI.getNumExplicitOperands(); - int VLIndex = RISCVII::hasVLOp(TSFlags) ? NumOperands - 2 : -1; - unsigned SEWIndex = NumOperands - 1; - bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags); - - RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); - return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic); - } - switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -0,0 +1,259 @@ +//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function pass that inserts VSETVLI instructions where +// needed. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "riscv-insert-vsetvli" +#define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" + +namespace { + +class RISCVInsertVSETVLI : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + +public: + static char ID; + + RISCVInsertVSETVLI() : MachineFunctionPass(ID) { + initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } + +private: + bool maybeInsertVSETVLI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + const MachineInstr *&PrevVSETVLI); +}; + +} // end anonymous namespace + +char RISCVInsertVSETVLI::ID = 0; + +INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, + false, false) + +static MachineInstr *elideCopies(MachineInstr *MI, + const MachineRegisterInfo *MRI) { + while (true) { + if (!MI->isFullCopy()) + return MI; + if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) + return nullptr; + MI = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!MI) + return nullptr; + } +} + +static unsigned computeVType(const MachineInstr &MI, uint64_t TSFlags, + const MachineRegisterInfo *MRI) { + unsigned NumOperands = MI.getNumExplicitOperands(); + + RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); + + unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm(); + assert(RISCVVType::isValidSEW(1 << Log2SEW) && "Unexpected SEW"); + RISCVII::VSEW ElementWidth = static_cast(Log2SEW - 3); + + // Default to tail agnostic unless the destination is tied to a source. + // Unless the source is undef. In that case the user would have some control + // over the tail values. The tail policy is also ignored on instructions + // that only update element 0 like vmv.s.x or reductions so use agnostic + // there to match the common case. + // FIXME: This is conservatively correct, but we might want to detect that + // the input is undefined. + bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags); + bool TailAgnostic = true; + unsigned UseOpIdx; + if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) { + TailAgnostic = false; + // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. + const MachineOperand &UseMO = MI.getOperand(UseOpIdx); + MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); + if (UseMI) { + UseMI = elideCopies(UseMI, MRI); + if (UseMI && UseMI->isImplicitDef()) + TailAgnostic = true; + } + } + + return RISCVVType::encodeVTYPE(VLMul, ElementWidth, + /*TailAgnostic*/ TailAgnostic, + /*MaskAgnostic*/ false); +} + +bool RISCVInsertVSETVLI::maybeInsertVSETVLI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + const MachineInstr *&PrevVSETVLI) { + MachineInstr &MI = *MII; + + uint64_t TSFlags = MI.getDesc().TSFlags; + if (!RISCVII::hasSEWOp(TSFlags)) + return false; + + // Add VL/VTYPE implicit uses. + if (RISCVII::hasVLOp(TSFlags)) { + MI.addOperand( + MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, /*isImp*/ true)); + } + MI.addOperand( + MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, /*isImp*/ true)); + + unsigned VTypeImm = computeVType(MI, TSFlags, MRI); + + // Invalidate PrevVSETVLI if the vtype doesn't match. + if (PrevVSETVLI && PrevVSETVLI->getOperand(2).getImm() != VTypeImm) + PrevVSETVLI = nullptr; + + DebugLoc DL = MI.getDebugLoc(); + + if (!RISCVII::hasVLOp(TSFlags)) { + // This instruction isn't changing VL, if it has the same VTYPE as the last + // VSETVLI we're done. + if (PrevVSETVLI) + return false; + + auto MIB = BuildMI(MBB, MII, DL, TII->get(RISCV::PseudoVSETVLI)) + .addReg(RISCV::X0, RegState::Define | RegState::Dead) + .addReg(RISCV::X0, RegState::Kill) + .addImm(VTypeImm) + .addReg(RISCV::VL, RegState::Implicit); + PrevVSETVLI = &*MIB; + return true; + } + + MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2); + + // First handle the VSETIVLI case. + if (VLOp.isImm()) { + int64_t Imm = VLOp.getImm(); + + // If we have a previous VSETIVLI, we don't need to insert one. The VTYPE + // was checked earlier, so just compare the immediate. + if (PrevVSETVLI && PrevVSETVLI->getOpcode() == RISCV::PseudoVSETIVLI && + PrevVSETVLI->getOperand(1).getImm() == Imm) + return false; + + // TODO: Use X0 as the destination. + Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); + auto MIB = BuildMI(MBB, MII, DL, TII->get(RISCV::PseudoVSETIVLI)) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addImm(Imm) + .addImm(VTypeImm); + PrevVSETVLI = &*MIB; + + return true; + } + + // Otherwise we expect VL to be a GPR. + Register AVLReg = VLOp.getReg(); + assert(AVLReg != RISCV::NoRegister && "Unexpected AVL register"); + + auto needVSETVLI = [](const MachineInstr *PrevVSETVLI, Register AVLReg) { + if (!PrevVSETVLI) + return true; + + Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg(); + // If the previous VSET{I}VLI's output (which isn't X0) is fed into this + // VSETVLI, this one isn't changing VL so is redundant. + // Only perform this on virtual registers to avoid the complexity of having + // to work out if the physical register was clobbered somewhere in between. + if (AVLReg.isVirtual() && AVLReg == PrevOutVL) + return false; + + if (PrevVSETVLI->getOpcode() == RISCV::PseudoVSETVLI && + AVLReg == PrevVSETVLI->getOperand(1).getReg() && + (AVLReg != RISCV::X0 || PrevOutVL != RISCV::X0)) + return false; + + return true; + }; + + if (needVSETVLI(PrevVSETVLI, AVLReg)) { + // TODO: Use X0 as the destination if AVLReg is not X0. + Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); + auto MIB = BuildMI(MBB, MII, DL, TII->get(RISCV::PseudoVSETVLI)) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addReg(AVLReg) + .addImm(VTypeImm); + PrevVSETVLI = &*MIB; + } + + // Clear the GPR, we don't need it anymore + VLOp.setReg(RISCV::NoRegister); + VLOp.setIsKill(false); + + return true; +} + +bool RISCVInsertVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) { + bool Changed = false; + const MachineInstr *PrevVSETVLI = nullptr; + + for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE; ++MII) { + MachineInstr &MI = *MII; + + // If this is an explicit VSETVLI/VSETIVILI instruction, cache it as our + // previous VL/VTYPE update. + if (MI.getOpcode() == RISCV::PseudoVSETVLI || + MI.getOpcode() == RISCV::PseudoVSETIVLI) { + PrevVSETVLI = &MI; + continue; + } + + Changed |= maybeInsertVSETVLI(MBB, MII, PrevVSETVLI); + + if (PrevVSETVLI && (MI.isCall() || MI.modifiesRegister(RISCV::VL) || + MI.modifiesRegister(RISCV::VTYPE))) { + // Old VL/VTYPE is overwritten. + PrevVSETVLI = nullptr; + } + } + + return Changed; +} + +bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { + // Skip if the vector extension is not enabled. + const RISCVSubtarget &ST = MF.getSubtarget(); + if (!ST.hasStdExtV()) + return false; + + const RISCVSubtarget &Subtarget = MF.getSubtarget(); + TII = Subtarget.getInstrInfo(); + MRI = &MF.getRegInfo(); + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) + Changed |= runOnMachineBasicBlock(MBB); + + return Changed; +} + +/// Returns an instance of the Insert VSETVLI pass. +FunctionPass *llvm::createRISCVInsertVSETVLIPass() { + return new RISCVInsertVSETVLI(); +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -619,8 +619,6 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -637,9 +635,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = "$rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -654,8 +650,6 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -672,9 +666,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = "$rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -690,8 +682,6 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -710,9 +700,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $merge", "$rd = $merge"); - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -727,8 +715,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -743,8 +729,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -758,8 +742,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -774,8 +756,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -790,8 +770,6 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -804,8 +782,6 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -819,9 +795,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints ="$rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -836,8 +810,6 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; // BaseInstr is not used in RISCVExpandPseudoInsts pass. @@ -853,9 +825,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Constraint; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -870,9 +840,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -887,8 +855,6 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -906,9 +872,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = "@earlyclobber $rd, $rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -925,9 +889,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Constraint; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -943,8 +905,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -960,8 +920,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -979,9 +937,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1001,9 +957,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1025,9 +979,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Constraint; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 0; @@ -1047,9 +999,7 @@ let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1068,9 +1018,7 @@ let mayLoad = 1; let mayStore = 1; let hasSideEffects = 1; - let usesCustomInserter = 1; let Constraints = "$vd_wd = $vd"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1088,9 +1036,7 @@ let mayLoad = 1; let mayStore = 1; let hasSideEffects = 1; - let usesCustomInserter = 1; let Constraints = "$vd_wd = $vd"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -1131,8 +1077,6 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1148,9 +1092,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = "$rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1166,8 +1108,6 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1183,9 +1123,7 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; let Constraints = "$rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1201,11 +1139,9 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; // For vector indexed segment loads, the destination vector register groups // cannot overlap the source vector register group let Constraints = "@earlyclobber $rd"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1222,11 +1158,9 @@ let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; - let usesCustomInserter = 1; // For vector indexed segment loads, the destination vector register groups // cannot overlap the source vector register group let Constraints = "@earlyclobber $rd, $rd = $merge"; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasMergeOp = 1; @@ -1241,8 +1175,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1258,8 +1190,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -1273,8 +1203,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1290,8 +1218,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -1307,8 +1233,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let HasDummyMask = 1; @@ -1325,8 +1249,6 @@ let mayLoad = 0; let mayStore = 1; let hasSideEffects = 0; - let usesCustomInserter = 1; - let Uses = [VL, VTYPE]; let HasVLOp = 1; let HasSEWOp = 1; let BaseInstr = !cast(PseudoToVInst.VInst); @@ -3521,7 +3443,7 @@ //===----------------------------------------------------------------------===// // 13.2. Vector Single-Width Averaging Add and Subtract //===----------------------------------------------------------------------===// -let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in { +let Uses = [VXRM], hasSideEffects = 1 in { defm PseudoVAADDU : VPseudoBinaryV_VV_VX; defm PseudoVAADD : VPseudoBinaryV_VV_VX; defm PseudoVASUBU : VPseudoBinaryV_VV_VX; @@ -3531,14 +3453,14 @@ //===----------------------------------------------------------------------===// // 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation //===----------------------------------------------------------------------===// -let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in { +let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in { defm PseudoVSMUL : VPseudoBinaryV_VV_VX; } //===----------------------------------------------------------------------===// // 13.4. Vector Single-Width Scaling Shift Instructions //===----------------------------------------------------------------------===// -let Uses = [VL, VTYPE, VXRM], hasSideEffects = 1 in { +let Uses = [VXRM], hasSideEffects = 1 in { defm PseudoVSSRL : VPseudoBinaryV_VV_VX_VI; defm PseudoVSSRA : VPseudoBinaryV_VV_VX_VI; } @@ -3546,7 +3468,7 @@ //===----------------------------------------------------------------------===// // 13.5. Vector Narrowing Fixed-Point Clip Instructions //===----------------------------------------------------------------------===// -let Uses = [VL, VTYPE, VXRM], Defs = [VXSAT], hasSideEffects = 1 in { +let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in { defm PseudoVNCLIP : VPseudoBinaryV_WV_WX_WI; defm PseudoVNCLIPU : VPseudoBinaryV_WV_WX_WI; } @@ -3792,8 +3714,7 @@ //===----------------------------------------------------------------------===// let Predicates = [HasStdExtV] in { -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1, - Uses = [VL, VTYPE] in { +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { foreach m = MxList.m in { let VLMul = m.value in { let HasSEWOp = 1, BaseInstr = VMV_X_S in @@ -3816,8 +3737,7 @@ //===----------------------------------------------------------------------===// let Predicates = [HasStdExtV, HasStdExtF] in { -let mayLoad = 0, mayStore = 0, hasSideEffects = 0, usesCustomInserter = 1, - Uses = [VL, VTYPE] in { +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { foreach m = MxList.m in { foreach f = FPList.fpinfo in { let VLMul = m.value in { diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -39,7 +39,7 @@ initializeGlobalISel(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVExpandPseudoPass(*PR); - initializeRISCVCleanupVSETVLIPass(*PR); + initializeRISCVInsertVSETVLIPass(*PR); } static StringRef computeDataLayout(const Triple &TT) { @@ -191,8 +191,7 @@ } void RISCVPassConfig::addPreRegAlloc() { - if (TM->getOptLevel() != CodeGenOpt::None) { + if (TM->getOptLevel() != CodeGenOpt::None) addPass(createRISCVMergeBaseOffsetOptPass()); - addPass(createRISCVCleanupVSETVLIPass()); - } + addPass(createRISCVInsertVSETVLIPass()); } diff --git a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir b/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-gpr.mir +++ /dev/null @@ -1,56 +0,0 @@ -# RUN: llc -mtriple riscv64 -mattr=+experimental-v %s \ -# RUN: -start-before=finalize-isel -stop-after=finalize-isel -o - \ -# RUN: | FileCheck --check-prefix=POST-INSERTER %s - -# RUN: llc -mtriple riscv64 -mattr=+experimental-v %s \ -# RUN: -start-before=finalize-isel -o - \ -# RUN: | FileCheck --check-prefix=CODEGEN %s - ---- | - define void @vadd_vint64m1( - *%pc, - *%pa, - *%pb, - i64 %vl) - { - ret void - } -... ---- -name: vadd_vint64m1 -tracksRegLiveness: true -body: | - bb.0 (%ir-block.0): - liveins: $x10, $x11, $x12, $x13 - - %3:gpr = COPY $x13 - %2:gpr = COPY $x12 - %1:gpr = COPY $x11 - %0:gpr = COPY $x10 - %4:vr = PseudoVLE64_V_M1 %1, %3, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) - %5:vr = PseudoVLE64_V_M1 %2, %3, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pb, align 8) - %6:vr = PseudoVADD_VV_M1 killed %4, killed %5, %3, 6, implicit $vl, implicit $vtype - PseudoVSE64_V_M1 killed %6, %0, %3, 6, implicit $vl, implicit $vtype :: (store unknown-size into %ir.pc, align 8) - PseudoRET - -... - -# POST-INSERTER: %0:gpr = COPY $x13 -# POST-INSERTER: %1:gpr = COPY $x12 -# POST-INSERTER: %2:gpr = COPY $x11 -# POST-INSERTER: %3:gpr = COPY $x10 -# POST-INSERTER: dead %7:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype -# POST-INSERTER: %4:vr = PseudoVLE64_V_M1 %2, $noreg, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) -# POST-INSERTER: dead %8:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype -# POST-INSERTER: %5:vr = PseudoVLE64_V_M1 %1, $noreg, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pb, align 8) -# POST-INSERTER: dead %9:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype -# POST-INSERTER: %6:vr = PseudoVADD_VV_M1 killed %4, killed %5, $noreg, 6, implicit $vl, implicit $vtype -# POST-INSERTER: dead %10:gpr = PseudoVSETVLI %0, 88, implicit-def $vl, implicit-def $vtype -# POST-INSERTER: PseudoVSE64_V_M1 killed %6, %3, $noreg, 6, implicit $vl, implicit $vtype :: (store unknown-size into %ir.pc, align 8) - -# CODEGEN: vsetvli a3, a3, e64,m1,ta,mu -# CODEGEN-NEXT: vle64.v v25, (a1) -# CODEGEN-NEXT: vle64.v v26, (a2) -# CODEGEN-NEXT: vadd.vv v25, v25, v26 -# CODEGEN-NEXT: vse64.v v25, (a0) -# CODEGEN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll b/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll +++ /dev/null @@ -1,32 +0,0 @@ -; This test shows the evolution of RVV pseudo instructions within isel. - -; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o %t.pre.mir \ -; RUN: -stop-before=finalize-isel -; RUN: cat %t.pre.mir | FileCheck --check-prefix=PRE-INSERTER %s - -; RUN: llc -mtriple riscv64 -mattr=+experimental-v %t.pre.mir -o %t.post.mir \ -; RUN: -start-before=finalize-isel -stop-after=finalize-isel -; RUN: cat %t.post.mir | FileCheck --check-prefix=POST-INSERTER %s - -define void @vadd_vint64m1( - *%pc, - *%pa, - *%pb) -{ - %va = load , * %pa - %vb = load , * %pb - %vc = add %va, %vb - store %vc, *%pc - ret void -} - -; PRE-INSERTER: %3:vr = VL1RE64_V %1 :: (load unknown-size from %ir.pa, align 8) -; PRE-INSERTER: %4:vr = VL1RE64_V %2 :: (load unknown-size from %ir.pb, align 8) -; PRE-INSERTER: %5:vr = PseudoVADD_VV_M1 killed %3, killed %4, $x0, 6, implicit $vl, implicit $vtype -; PRE-INSERTER: VS1R_V killed %5, %0 :: (store unknown-size into %ir.pc, align 8) - -; POST-INSERTER: %3:vr = VL1RE64_V %1 :: (load unknown-size from %ir.pa, align 8) -; POST-INSERTER: %4:vr = VL1RE64_V %2 :: (load unknown-size from %ir.pb, align 8) -; POST-INSERTER: dead %6:gpr = PseudoVSETVLI $x0, 88, implicit-def $vl, implicit-def $vtype -; POST-INSERTER: %5:vr = PseudoVADD_VV_M1 killed %3, killed %4, $noreg, 6, implicit $vl, implicit $vtype -; POST-INSERTER: VS1R_V killed %5, %0 :: (store unknown-size into %ir.pc, align 8) diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=riscv64 -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s +# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s --- | define void @add_scalable_offset( @@ -55,7 +55,7 @@ ; CHECK: PseudoRET %1:gpr = COPY $x11 %0:gpr = COPY $x10 - %2:vr = PseudoVLE64_V_M1 %0, %1, 6, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) + %2:vr = PseudoVLE64_V_M1 %0, %1, 6 :: (load unknown-size from %ir.pa, align 8) %3:gpr = ADDI %stack.2, 0 VS1R_V killed %2:vr, %3:gpr PseudoRET diff --git a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetivli.mir +++ /dev/null @@ -1,46 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s - -# Make sure we don't combine these VSET{I}VLIs in the cleanup pass. We could not -# differentiate AVL values if the opcode of the previous one is different from -# current one. - ---- | - ; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetivli.ll' - source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetivli.ll" - target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" - target triple = "riscv64" - - define void @cleanup_vsetivli() #0 { - ret void - } - - attributes #0 = { "target-features"="+experimental-v" } - -... ---- -name: cleanup_vsetivli -alignment: 4 -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } -frameInfo: - maxAlignment: 1 -machineFunctionInfo: {} -body: | - bb.0 (%ir-block.0): - ; CHECK-LABEL: name: cleanup_vsetivli - ; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: dead %1:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: dead %3:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: dead %5:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: PseudoRET - dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - dead %1:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype - dead %2:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype - dead %3:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - dead %4:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - dead %5:gpr = PseudoVSETIVLI 5, 12, implicit-def $vl, implicit-def $vtype - PseudoRET - -... diff --git a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir b/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cleanup-vsetvli.mir +++ /dev/null @@ -1,79 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s - ---- | - ; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll' - source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll" - target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" - target triple = "riscv64" - - define void @cleanup_vsetvli0() #0 { - ret void - } - - define void @cleanup_vsetvli1() #0 { - ret void - } - - attributes #0 = { "target-features"="+experimental-v" } - -... ---- -# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first -# keeps the previous value of VL, the second sets it to VLMAX. We can't remove -# the first since we can't tell if this is a change of VL. -name: cleanup_vsetvli0 -alignment: 4 -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } -frameInfo: - maxAlignment: 1 -machineFunctionInfo: {} -body: | - bb.0 (%ir-block.0): - ; CHECK-LABEL: name: cleanup_vsetvli0 - ; CHECK: dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: PseudoRET - dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - PseudoRET - -... ---- -# 1. Ensure we can remove the second VSETVLI which takes its AVL from the first VSETVLI. -# 2. Ensure we can remove the fourth VSETVLI which takes its AVL from the VSETIVLI. -# 3. Make sure we don't combine the latter two VSETVLIs; the first outputs to a -# physical register which is clobbered by a later instruction. -name: cleanup_vsetvli1 -alignment: 4 -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } -frameInfo: - maxAlignment: 1 -machineFunctionInfo: {} -body: | - bb.0 (%ir-block.0): - liveins: $x3 - ; CHECK-LABEL: name: cleanup_vsetvli1 - ; CHECK: liveins: $x3 - ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: [[PseudoVSETIVLI:%[0-9]+]]:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: $x1 = COPY $x3 - ; CHECK: dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype - ; CHECK: PseudoRET - %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - dead %1:gpr = PseudoVSETVLI %0, 12, implicit-def $vl, implicit-def $vtype - - %2:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype - dead %3:gpr = PseudoVSETVLI %2, 12, implicit-def $vl, implicit-def $vtype - - $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype - $x1 = COPY $x3 - dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype - PseudoRET - -... diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -3667,11 +3667,12 @@ ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a2, a1, 257 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX2-RV32-NEXT: lui a2, 4112 +; LMULMAX2-RV32-NEXT: addi a2, a2, 257 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX2-RV32-NEXT: # %bb.1: +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 @@ -3726,12 +3727,13 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_5 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -3756,8 +3758,8 @@ ; LMULMAX2-RV32-NEXT: addi a1, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB3_6 ; LMULMAX2-RV32-NEXT: .LBB3_5: -; LMULMAX2-RV32-NEXT: srli a5, a1, 1 -; LMULMAX2-RV32-NEXT: or a1, a1, a5 +; LMULMAX2-RV32-NEXT: srli a1, a5, 1 +; LMULMAX2-RV32-NEXT: or a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -3900,11 +3902,12 @@ ; LMULMAX1-RV32-NEXT: addi a3, a1, 819 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a7, a1, -241 -; LMULMAX1-RV32-NEXT: lui a1, 4112 -; LMULMAX1-RV32-NEXT: addi a2, a1, 257 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: lui a2, 4112 +; LMULMAX1-RV32-NEXT: addi a2, a2, 257 ; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX1-RV32-NEXT: # %bb.1: +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 2 @@ -3959,12 +3962,13 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_5 +; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26 +; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: srli a1, a5, 1 -; LMULMAX1-RV32-NEXT: or a1, a5, a1 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: srli a5, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 4 @@ -3989,8 +3993,8 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB3_6 ; LMULMAX1-RV32-NEXT: .LBB3_5: -; LMULMAX1-RV32-NEXT: srli a5, a1, 1 -; LMULMAX1-RV32-NEXT: or a1, a1, a5 +; LMULMAX1-RV32-NEXT: srli a1, a5, 1 +; LMULMAX1-RV32-NEXT: or a1, a5, a1 ; LMULMAX1-RV32-NEXT: srli a5, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 4 @@ -11120,11 +11124,12 @@ ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a1, 4112 -; LMULMAX2-RV32-NEXT: addi a2, a1, 257 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: lui a2, 4112 +; LMULMAX2-RV32-NEXT: addi a2, a2, 257 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2 ; LMULMAX2-RV32-NEXT: # %bb.1: +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 @@ -11179,12 +11184,13 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 3 ; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_5 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11209,8 +11215,8 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_6 ; LMULMAX2-RV32-NEXT: .LBB7_5: -; LMULMAX2-RV32-NEXT: srli a5, a1, 1 -; LMULMAX2-RV32-NEXT: or a1, a1, a5 +; LMULMAX2-RV32-NEXT: srli a1, a5, 1 +; LMULMAX2-RV32-NEXT: or a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11237,12 +11243,13 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2 ; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8 ; LMULMAX2-RV32-NEXT: # %bb.7: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11267,8 +11274,8 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_9 ; LMULMAX2-RV32-NEXT: .LBB7_8: -; LMULMAX2-RV32-NEXT: srli a5, a1, 1 -; LMULMAX2-RV32-NEXT: or a1, a1, a5 +; LMULMAX2-RV32-NEXT: srli a1, a5, 1 +; LMULMAX2-RV32-NEXT: or a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11295,12 +11302,13 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11 ; LMULMAX2-RV32-NEXT: # %bb.10: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11325,8 +11333,8 @@ ; LMULMAX2-RV32-NEXT: addi a1, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_12 ; LMULMAX2-RV32-NEXT: .LBB7_11: -; LMULMAX2-RV32-NEXT: srli a5, a1, 1 -; LMULMAX2-RV32-NEXT: or a1, a1, a5 +; LMULMAX2-RV32-NEXT: srli a1, a5, 1 +; LMULMAX2-RV32-NEXT: or a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11544,13 +11552,14 @@ ; LMULMAX1-RV32-NEXT: addi a4, a2, 819 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi t0, a2, -241 -; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: addi a3, a2, 257 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 +; LMULMAX1-RV32-NEXT: lui a3, 4112 +; LMULMAX1-RV32-NEXT: addi a3, a3, 257 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: srli a1, a2, 1 -; LMULMAX1-RV32-NEXT: or a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: srli a2, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11604,11 +11613,12 @@ ; LMULMAX1-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: srli a1, a2, 1 -; LMULMAX1-RV32-NEXT: or a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: srli a2, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11663,11 +11673,12 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX1-RV32-NEXT: # %bb.7: -; LMULMAX1-RV32-NEXT: srli a1, a2, 1 -; LMULMAX1-RV32-NEXT: or a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: srli a2, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11721,11 +11732,12 @@ ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11 ; LMULMAX1-RV32-NEXT: # %bb.10: -; LMULMAX1-RV32-NEXT: srli a1, a2, 1 -; LMULMAX1-RV32-NEXT: or a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: srli a2, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -2538,9 +2538,6 @@ ; LMULMAX2-RV32-NEXT: sw zero, 12(sp) ; LMULMAX2-RV32-NEXT: sw zero, 4(sp) ; LMULMAX2-RV32-NEXT: addi a6, zero, 32 -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX2-RV32-NEXT: lui a1, 209715 @@ -2548,13 +2545,16 @@ ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 ; LMULMAX2-RV32-NEXT: lui a2, 4112 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25 ; LMULMAX2-RV32-NEXT: addi a2, a2, 257 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_2 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: addi a1, a5, -1 -; LMULMAX2-RV32-NEXT: not a5, a5 -; LMULMAX2-RV32-NEXT: and a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: addi a5, a1, -1 +; LMULMAX2-RV32-NEXT: not a1, a1 +; LMULMAX2-RV32-NEXT: and a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -2570,9 +2570,9 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB3_3 ; LMULMAX2-RV32-NEXT: .LBB3_2: -; LMULMAX2-RV32-NEXT: addi a5, a1, -1 -; LMULMAX2-RV32-NEXT: not a1, a1 -; LMULMAX2-RV32-NEXT: and a1, a1, a5 +; LMULMAX2-RV32-NEXT: addi a1, a5, -1 +; LMULMAX2-RV32-NEXT: not a5, a5 +; LMULMAX2-RV32-NEXT: and a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -2590,10 +2590,11 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25 -; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX2-RV32-NEXT: # %bb.4: +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 ; LMULMAX2-RV32-NEXT: not a1, a1 ; LMULMAX2-RV32-NEXT: and a1, a1, a5 @@ -2719,9 +2720,6 @@ ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) ; LMULMAX1-RV32-NEXT: sw zero, 4(sp) ; LMULMAX1-RV32-NEXT: addi a6, zero, 32 -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26 ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX1-RV32-NEXT: lui a1, 209715 @@ -2729,13 +2727,16 @@ ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a7, a1, -241 ; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25 ; LMULMAX1-RV32-NEXT: addi a2, a2, 257 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_2 +; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: addi a1, a5, -1 -; LMULMAX1-RV32-NEXT: not a5, a5 -; LMULMAX1-RV32-NEXT: and a1, a5, a1 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: addi a5, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: and a5, a5, a4 ; LMULMAX1-RV32-NEXT: sub a1, a1, a5 @@ -2751,9 +2752,9 @@ ; LMULMAX1-RV32-NEXT: addi a5, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB3_3 ; LMULMAX1-RV32-NEXT: .LBB3_2: -; LMULMAX1-RV32-NEXT: addi a5, a1, -1 -; LMULMAX1-RV32-NEXT: not a1, a1 -; LMULMAX1-RV32-NEXT: and a1, a1, a5 +; LMULMAX1-RV32-NEXT: addi a1, a5, -1 +; LMULMAX1-RV32-NEXT: not a5, a5 +; LMULMAX1-RV32-NEXT: and a1, a5, a1 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: and a5, a5, a4 ; LMULMAX1-RV32-NEXT: sub a1, a1, a5 @@ -2771,10 +2772,11 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25 -; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX1-RV32-NEXT: # %bb.4: +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: addi a5, a1, -1 ; LMULMAX1-RV32-NEXT: not a1, a1 ; LMULMAX1-RV32-NEXT: and a1, a1, a5 @@ -7647,9 +7649,6 @@ ; LMULMAX2-RV32-NEXT: sw zero, 12(sp) ; LMULMAX2-RV32-NEXT: sw zero, 4(sp) ; LMULMAX2-RV32-NEXT: addi a6, zero, 32 -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a4, a1, 1365 ; LMULMAX2-RV32-NEXT: lui a1, 209715 @@ -7657,13 +7656,16 @@ ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 ; LMULMAX2-RV32-NEXT: lui a2, 4112 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 ; LMULMAX2-RV32-NEXT: addi a2, a2, 257 -; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_2 +; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: addi a1, a5, -1 -; LMULMAX2-RV32-NEXT: not a5, a5 -; LMULMAX2-RV32-NEXT: and a1, a5, a1 +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 +; LMULMAX2-RV32-NEXT: addi a5, a1, -1 +; LMULMAX2-RV32-NEXT: not a1, a1 +; LMULMAX2-RV32-NEXT: and a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -7679,9 +7681,9 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_3 ; LMULMAX2-RV32-NEXT: .LBB7_2: -; LMULMAX2-RV32-NEXT: addi a5, a1, -1 -; LMULMAX2-RV32-NEXT: not a1, a1 -; LMULMAX2-RV32-NEXT: and a1, a1, a5 +; LMULMAX2-RV32-NEXT: addi a1, a5, -1 +; LMULMAX2-RV32-NEXT: not a5, a5 +; LMULMAX2-RV32-NEXT: and a1, a5, a1 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -7699,10 +7701,11 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 3 ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5 ; LMULMAX2-RV32-NEXT: # %bb.4: +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 ; LMULMAX2-RV32-NEXT: not a1, a1 ; LMULMAX2-RV32-NEXT: and a1, a1, a5 @@ -7741,10 +7744,11 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2 ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8 ; LMULMAX2-RV32-NEXT: # %bb.7: +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 ; LMULMAX2-RV32-NEXT: not a1, a1 ; LMULMAX2-RV32-NEXT: and a1, a1, a5 @@ -7783,10 +7787,11 @@ ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11 ; LMULMAX2-RV32-NEXT: # %bb.10: +; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a6 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 ; LMULMAX2-RV32-NEXT: not a1, a1 ; LMULMAX2-RV32-NEXT: and a1, a1, a5 @@ -7962,25 +7967,25 @@ ; LMULMAX1-RV32-NEXT: .cfi_def_cfa_offset 32 ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vle64.v v25, (a0) -; LMULMAX1-RV32-NEXT: addi a6, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v26, (a6) +; LMULMAX1-RV32-NEXT: addi a7, a0, 16 +; LMULMAX1-RV32-NEXT: vle64.v v26, (a7) ; LMULMAX1-RV32-NEXT: sw zero, 28(sp) ; LMULMAX1-RV32-NEXT: sw zero, 20(sp) -; LMULMAX1-RV32-NEXT: addi a7, zero, 32 -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27 -; LMULMAX1-RV32-NEXT: lui a2, 349525 -; LMULMAX1-RV32-NEXT: addi a5, a2, 1365 -; LMULMAX1-RV32-NEXT: lui a2, 209715 -; LMULMAX1-RV32-NEXT: addi a4, a2, 819 -; LMULMAX1-RV32-NEXT: lui a2, 61681 -; LMULMAX1-RV32-NEXT: addi t0, a2, -241 +; LMULMAX1-RV32-NEXT: addi a6, zero, 32 +; LMULMAX1-RV32-NEXT: lui a1, 349525 +; LMULMAX1-RV32-NEXT: addi a5, a1, 1365 +; LMULMAX1-RV32-NEXT: lui a1, 209715 +; LMULMAX1-RV32-NEXT: addi a4, a1, 819 +; LMULMAX1-RV32-NEXT: lui a1, 61681 +; LMULMAX1-RV32-NEXT: addi t0, a1, -241 ; LMULMAX1-RV32-NEXT: lui a3, 4112 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: addi a3, a3, 257 -; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_2 +; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2 ; LMULMAX1-RV32-NEXT: # %bb.1: +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27 ; LMULMAX1-RV32-NEXT: addi a2, a1, -1 ; LMULMAX1-RV32-NEXT: not a1, a1 ; LMULMAX1-RV32-NEXT: and a1, a1, a2 @@ -7999,9 +8004,9 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_3 ; LMULMAX1-RV32-NEXT: .LBB7_2: -; LMULMAX1-RV32-NEXT: addi a1, a2, -1 -; LMULMAX1-RV32-NEXT: not a2, a2 -; LMULMAX1-RV32-NEXT: and a1, a2, a1 +; LMULMAX1-RV32-NEXT: addi a2, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 @@ -8019,13 +8024,14 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: addi a1, a2, -1 -; LMULMAX1-RV32-NEXT: not a2, a2 -; LMULMAX1-RV32-NEXT: and a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: addi a2, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 @@ -8060,15 +8066,16 @@ ; LMULMAX1-RV32-NEXT: sw a1, 24(sp) ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) ; LMULMAX1-RV32-NEXT: sw zero, 4(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu -; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX1-RV32-NEXT: # %bb.7: -; LMULMAX1-RV32-NEXT: addi a1, a2, -1 -; LMULMAX1-RV32-NEXT: not a2, a2 -; LMULMAX1-RV32-NEXT: and a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: addi a2, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 @@ -8104,13 +8111,14 @@ ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 -; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11 ; LMULMAX1-RV32-NEXT: # %bb.10: -; LMULMAX1-RV32-NEXT: addi a1, a2, -1 -; LMULMAX1-RV32-NEXT: not a2, a2 -; LMULMAX1-RV32-NEXT: and a1, a2, a1 +; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: addi a2, a1, -1 +; LMULMAX1-RV32-NEXT: not a1, a1 +; LMULMAX1-RV32-NEXT: and a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 @@ -8149,7 +8157,7 @@ ; LMULMAX1-RV32-NEXT: vle32.v v26, (a1) ; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vse64.v v25, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v26, (a6) +; LMULMAX1-RV32-NEXT: vse64.v v26, (a7) ; LMULMAX1-RV32-NEXT: addi sp, sp, 32 ; LMULMAX1-RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll @@ -7,20 +7,22 @@ define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) { ; CHECK-LABEL: select_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft3, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft2, v25 ; CHECK-NEXT: bnez a0, .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 -; CHECK-NEXT: fmv.h ft2, ft3 +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: vsetivli a0, 1, e16,mf4,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB0_3 ; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetivli a0, 1, e16,mf4,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: vfmv.f.s ft1, v25 ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu -; CHECK-NEXT: vfmv.v.f v8, ft2 +; CHECK-NEXT: vfmv.v.f v8, ft1 ; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x half> %a, <2 x half> %b @@ -31,23 +33,27 @@ ; CHECK-LABEL: selectcc_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: feq.h a0, fa0, fa1 +; CHECK-NEXT: bnez a0, .LBB1_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vsetivli a1, 1, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: j .LBB1_3 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: vsetivli a1, 1, e16,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB1_3: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: bnez a0, .LBB1_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 -; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: vsetivli a1, 2, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.v.f v25, ft0 -; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: bnez a0, .LBB1_5 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: j .LBB1_6 +; CHECK-NEXT: .LBB1_5: +; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB1_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.h ft0, ft1 -; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: .LBB1_6: ; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -62,44 +68,49 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB2_2 +; CHECK-NEXT: bnez a0, .LBB2_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: beqz a0, .LBB2_4 ; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: j .LBB2_5 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB2_2 +; CHECK-NEXT: .LBB2_4: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: .LBB2_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB2_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: fsh ft0, 14(sp) +; CHECK-NEXT: bnez a0, .LBB2_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB2_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB2_6: -; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: j .LBB2_8 +; CHECK-NEXT: .LBB2_7: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB2_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB2_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB2_11 +; CHECK-NEXT: .LBB2_10: +; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB2_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB2_8: -; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: .LBB2_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 10(sp) ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vle16.v v8, (a0) @@ -115,44 +126,49 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: feq.h a0, fa0, fa1 -; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB3_2 +; CHECK-NEXT: bnez a0, .LBB3_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: beqz a0, .LBB3_4 ; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: j .LBB3_5 +; CHECK-NEXT: .LBB3_3: +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB3_2 +; CHECK-NEXT: .LBB3_4: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: .LBB3_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB3_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: fsh ft0, 14(sp) +; CHECK-NEXT: bnez a0, .LBB3_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB3_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB3_6: -; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: j .LBB3_8 +; CHECK-NEXT: .LBB3_7: ; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB3_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB3_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB3_11 +; CHECK-NEXT: .LBB3_10: +; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB3_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB3_8: -; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: .LBB3_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 10(sp) ; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vle16.v v8, (a0) @@ -168,84 +184,93 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: bnez a0, .LBB4_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsh ft0, 0(sp) +; CHECK-NEXT: beqz a0, .LBB4_4 ; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 7 +; CHECK-NEXT: j .LBB4_5 +; CHECK-NEXT: .LBB4_3: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsh ft0, 0(sp) +; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: .LBB4_4: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 7 +; CHECK-NEXT: .LBB4_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 7 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: fsh ft0, 14(sp) +; CHECK-NEXT: bnez a0, .LBB4_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 6 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB4_8 +; CHECK-NEXT: .LBB4_7: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 6 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_6: -; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: .LBB4_8: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB4_10 +; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 5 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB4_11 +; CHECK-NEXT: .LBB4_10: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 5 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_8: -; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: .LBB4_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 10(sp) +; CHECK-NEXT: bnez a0, .LBB4_13 +; CHECK-NEXT: # %bb.12: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 4 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB4_14 +; CHECK-NEXT: .LBB4_13: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 4 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_10: -; CHECK-NEXT: fsh ft1, 8(sp) +; CHECK-NEXT: .LBB4_14: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB4_16 +; CHECK-NEXT: # %bb.15: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB4_17 +; CHECK-NEXT: .LBB4_16: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_12: -; CHECK-NEXT: fsh ft1, 6(sp) +; CHECK-NEXT: .LBB4_17: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 6(sp) +; CHECK-NEXT: bnez a0, .LBB4_19 +; CHECK-NEXT: # %bb.18: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_14: -; CHECK-NEXT: fsh ft1, 4(sp) +; CHECK-NEXT: j .LBB4_20 +; CHECK-NEXT: .LBB4_19: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB4_20: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 4(sp) +; CHECK-NEXT: bnez a0, .LBB4_22 +; CHECK-NEXT: # %bb.21: +; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB4_23 +; CHECK-NEXT: .LBB4_22: +; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB4_16 -; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB4_16: -; CHECK-NEXT: fsh ft1, 2(sp) +; CHECK-NEXT: .LBB4_23: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 2(sp) ; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu ; CHECK-NEXT: vle16.v v8, (sp) ; CHECK-NEXT: addi sp, sp, 16 @@ -260,84 +285,93 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: feq.h a0, fa0, fa1 -; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB5_2 +; CHECK-NEXT: bnez a0, .LBB5_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.h ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsh ft0, 0(sp) +; CHECK-NEXT: beqz a0, .LBB5_4 ; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 7 +; CHECK-NEXT: j .LBB5_5 +; CHECK-NEXT: .LBB5_3: +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsh ft0, 0(sp) +; CHECK-NEXT: bnez a0, .LBB5_2 +; CHECK-NEXT: .LBB5_4: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 7 +; CHECK-NEXT: .LBB5_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 7 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_4: -; CHECK-NEXT: fsh ft1, 14(sp) +; CHECK-NEXT: fsh ft0, 14(sp) +; CHECK-NEXT: bnez a0, .LBB5_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 6 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB5_8 +; CHECK-NEXT: .LBB5_7: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 6 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_6: -; CHECK-NEXT: fsh ft1, 12(sp) +; CHECK-NEXT: .LBB5_8: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB5_10 +; CHECK-NEXT: # %bb.9: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 5 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB5_11 +; CHECK-NEXT: .LBB5_10: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 5 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_8: -; CHECK-NEXT: fsh ft1, 10(sp) +; CHECK-NEXT: .LBB5_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 10(sp) +; CHECK-NEXT: bnez a0, .LBB5_13 +; CHECK-NEXT: # %bb.12: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 4 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB5_14 +; CHECK-NEXT: .LBB5_13: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 4 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_10: -; CHECK-NEXT: fsh ft1, 8(sp) +; CHECK-NEXT: .LBB5_14: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB5_16 +; CHECK-NEXT: # %bb.15: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 -; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: j .LBB5_17 +; CHECK-NEXT: .LBB5_16: +; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_12: -; CHECK-NEXT: fsh ft1, 6(sp) +; CHECK-NEXT: .LBB5_17: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 6(sp) +; CHECK-NEXT: bnez a0, .LBB5_19 +; CHECK-NEXT: # %bb.18: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_14: -; CHECK-NEXT: fsh ft1, 4(sp) +; CHECK-NEXT: j .LBB5_20 +; CHECK-NEXT: .LBB5_19: ; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB5_20: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 4(sp) +; CHECK-NEXT: bnez a0, .LBB5_22 +; CHECK-NEXT: # %bb.21: +; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB5_23 +; CHECK-NEXT: .LBB5_22: +; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB5_16 -; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: fmv.h ft1, ft0 -; CHECK-NEXT: .LBB5_16: -; CHECK-NEXT: fsh ft1, 2(sp) +; CHECK-NEXT: .LBB5_23: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsh ft0, 2(sp) ; CHECK-NEXT: vsetivli a0, 8, e16,m1,ta,mu ; CHECK-NEXT: vle16.v v8, (sp) ; CHECK-NEXT: addi sp, sp, 16 @@ -359,165 +393,182 @@ ; RV32-NEXT: addi s0, sp, 64 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB6_2 +; RV32-NEXT: bnez a0, .LBB6_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.h ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsh ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB6_4 ; RV32-NEXT: .LBB6_2: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 15 +; RV32-NEXT: j .LBB6_5 +; RV32-NEXT: .LBB6_3: +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsh ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB6_2 +; RV32-NEXT: .LBB6_4: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 15 +; RV32-NEXT: .LBB6_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_4: -; RV32-NEXT: fsh ft1, 30(sp) +; RV32-NEXT: fsh ft0, 30(sp) +; RV32-NEXT: bnez a0, .LBB6_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 14 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_6: -; RV32-NEXT: fsh ft1, 28(sp) +; RV32-NEXT: j .LBB6_8 +; RV32-NEXT: .LBB6_7: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 13 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vslidedown.vi v26, v8, 14 ; RV32-NEXT: .LBB6_8: -; RV32-NEXT: fsh ft1, 26(sp) -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 12 ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: fsh ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB6_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 13 +; RV32-NEXT: j .LBB6_11 ; RV32-NEXT: .LBB6_10: -; RV32-NEXT: fsh ft1, 24(sp) ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 11 +; RV32-NEXT: vslidedown.vi v26, v8, 13 +; RV32-NEXT: .LBB6_11: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_12: -; RV32-NEXT: fsh ft1, 22(sp) +; RV32-NEXT: fsh ft0, 26(sp) +; RV32-NEXT: bnez a0, .LBB6_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 10 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_14: -; RV32-NEXT: fsh ft1, 20(sp) +; RV32-NEXT: vslidedown.vi v26, v10, 12 +; RV32-NEXT: j .LBB6_14 +; RV32-NEXT: .LBB6_13: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 9 +; RV32-NEXT: vslidedown.vi v26, v8, 12 +; RV32-NEXT: .LBB6_14: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: fsh ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB6_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 11 +; RV32-NEXT: j .LBB6_17 ; RV32-NEXT: .LBB6_16: -; RV32-NEXT: fsh ft1, 18(sp) ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 8 +; RV32-NEXT: vslidedown.vi v26, v8, 11 +; RV32-NEXT: .LBB6_17: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_18: -; RV32-NEXT: fsh ft1, 16(sp) +; RV32-NEXT: fsh ft0, 22(sp) +; RV32-NEXT: bnez a0, .LBB6_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 7 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_20: -; RV32-NEXT: fsh ft1, 14(sp) +; RV32-NEXT: vslidedown.vi v26, v10, 10 +; RV32-NEXT: j .LBB6_20 +; RV32-NEXT: .LBB6_19: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 6 +; RV32-NEXT: vslidedown.vi v26, v8, 10 +; RV32-NEXT: .LBB6_20: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: fsh ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB6_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 9 +; RV32-NEXT: j .LBB6_23 ; RV32-NEXT: .LBB6_22: -; RV32-NEXT: fsh ft1, 12(sp) ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 5 +; RV32-NEXT: vslidedown.vi v26, v8, 9 +; RV32-NEXT: .LBB6_23: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_24: -; RV32-NEXT: fsh ft1, 10(sp) +; RV32-NEXT: fsh ft0, 18(sp) +; RV32-NEXT: bnez a0, .LBB6_25 +; RV32-NEXT: # %bb.24: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_26: -; RV32-NEXT: fsh ft1, 8(sp) +; RV32-NEXT: vslidedown.vi v26, v10, 8 +; RV32-NEXT: j .LBB6_26 +; RV32-NEXT: .LBB6_25: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: vslidedown.vi v26, v8, 8 +; RV32-NEXT: .LBB6_26: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 +; RV32-NEXT: fsh ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB6_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 7 +; RV32-NEXT: j .LBB6_29 ; RV32-NEXT: .LBB6_28: -; RV32-NEXT: fsh ft1, 6(sp) ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 2 +; RV32-NEXT: vslidedown.vi v26, v8, 7 +; RV32-NEXT: .LBB6_29: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB6_30: -; RV32-NEXT: fsh ft1, 4(sp) +; RV32-NEXT: fsh ft0, 14(sp) +; RV32-NEXT: bnez a0, .LBB6_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB6_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.h ft1, ft0 +; RV32-NEXT: vslidedown.vi v26, v10, 6 +; RV32-NEXT: j .LBB6_32 +; RV32-NEXT: .LBB6_31: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 6 ; RV32-NEXT: .LBB6_32: -; RV32-NEXT: fsh ft1, 2(sp) -; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB6_34 +; RV32-NEXT: # %bb.33: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 5 +; RV32-NEXT: j .LBB6_35 +; RV32-NEXT: .LBB6_34: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 5 +; RV32-NEXT: .LBB6_35: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 10(sp) +; RV32-NEXT: bnez a0, .LBB6_37 +; RV32-NEXT: # %bb.36: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 4 +; RV32-NEXT: j .LBB6_38 +; RV32-NEXT: .LBB6_37: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 4 +; RV32-NEXT: .LBB6_38: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB6_40 +; RV32-NEXT: # %bb.39: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: j .LBB6_41 +; RV32-NEXT: .LBB6_40: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: .LBB6_41: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 6(sp) +; RV32-NEXT: bnez a0, .LBB6_43 +; RV32-NEXT: # %bb.42: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 2 +; RV32-NEXT: j .LBB6_44 +; RV32-NEXT: .LBB6_43: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB6_44: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 4(sp) +; RV32-NEXT: bnez a0, .LBB6_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB6_47 +; RV32-NEXT: .LBB6_46: +; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 1 +; RV32-NEXT: .LBB6_47: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 2(sp) +; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; RV32-NEXT: vle16.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 ; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload @@ -536,164 +587,181 @@ ; RV64-NEXT: addi s0, sp, 64 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB6_2 +; RV64-NEXT: bnez a0, .LBB6_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.h ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsh ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB6_4 ; RV64-NEXT: .LBB6_2: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 15 +; RV64-NEXT: j .LBB6_5 +; RV64-NEXT: .LBB6_3: +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsh ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB6_2 +; RV64-NEXT: .LBB6_4: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 15 +; RV64-NEXT: .LBB6_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_4: -; RV64-NEXT: fsh ft1, 30(sp) +; RV64-NEXT: fsh ft0, 30(sp) +; RV64-NEXT: bnez a0, .LBB6_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 14 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_8 +; RV64-NEXT: .LBB6_7: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_6: -; RV64-NEXT: fsh ft1, 28(sp) +; RV64-NEXT: .LBB6_8: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 28(sp) +; RV64-NEXT: bnez a0, .LBB6_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 13 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_11 +; RV64-NEXT: .LBB6_10: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_8: -; RV64-NEXT: fsh ft1, 26(sp) +; RV64-NEXT: .LBB6_11: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 26(sp) +; RV64-NEXT: bnez a0, .LBB6_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 12 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_14 +; RV64-NEXT: .LBB6_13: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_10: -; RV64-NEXT: fsh ft1, 24(sp) +; RV64-NEXT: .LBB6_14: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB6_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 11 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_17 +; RV64-NEXT: .LBB6_16: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_12: -; RV64-NEXT: fsh ft1, 22(sp) +; RV64-NEXT: .LBB6_17: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 22(sp) +; RV64-NEXT: bnez a0, .LBB6_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 10 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_20 +; RV64-NEXT: .LBB6_19: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_14: -; RV64-NEXT: fsh ft1, 20(sp) +; RV64-NEXT: .LBB6_20: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 20(sp) +; RV64-NEXT: bnez a0, .LBB6_22 +; RV64-NEXT: # %bb.21: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 9 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_23 +; RV64-NEXT: .LBB6_22: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_16: -; RV64-NEXT: fsh ft1, 18(sp) +; RV64-NEXT: .LBB6_23: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 18(sp) +; RV64-NEXT: bnez a0, .LBB6_25 +; RV64-NEXT: # %bb.24: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 8 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_26 +; RV64-NEXT: .LBB6_25: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_18: -; RV64-NEXT: fsh ft1, 16(sp) +; RV64-NEXT: .LBB6_26: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB6_28 +; RV64-NEXT: # %bb.27: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 7 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_29 +; RV64-NEXT: .LBB6_28: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_20: -; RV64-NEXT: fsh ft1, 14(sp) +; RV64-NEXT: .LBB6_29: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 14(sp) +; RV64-NEXT: bnez a0, .LBB6_31 +; RV64-NEXT: # %bb.30: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_32 +; RV64-NEXT: .LBB6_31: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_22: -; RV64-NEXT: fsh ft1, 12(sp) +; RV64-NEXT: .LBB6_32: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB6_34 +; RV64-NEXT: # %bb.33: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_35 +; RV64-NEXT: .LBB6_34: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_24: -; RV64-NEXT: fsh ft1, 10(sp) +; RV64-NEXT: .LBB6_35: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 10(sp) +; RV64-NEXT: bnez a0, .LBB6_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_38 +; RV64-NEXT: .LBB6_37: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_26: -; RV64-NEXT: fsh ft1, 8(sp) +; RV64-NEXT: .LBB6_38: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB6_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB6_41 +; RV64-NEXT: .LBB6_40: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_28: -; RV64-NEXT: fsh ft1, 6(sp) +; RV64-NEXT: .LBB6_41: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 6(sp) +; RV64-NEXT: bnez a0, .LBB6_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_30: -; RV64-NEXT: fsh ft1, 4(sp) +; RV64-NEXT: j .LBB6_44 +; RV64-NEXT: .LBB6_43: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB6_44: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 4(sp) +; RV64-NEXT: bnez a0, .LBB6_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB6_47 +; RV64-NEXT: .LBB6_46: +; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB6_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB6_32: -; RV64-NEXT: fsh ft1, 2(sp) +; RV64-NEXT: .LBB6_47: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 2(sp) ; RV64-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; RV64-NEXT: vle16.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -718,164 +786,181 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 ; RV32-NEXT: feq.h a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB7_2 +; RV32-NEXT: bnez a0, .LBB7_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.h ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsh ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB7_4 ; RV32-NEXT: .LBB7_2: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 15 +; RV32-NEXT: j .LBB7_5 +; RV32-NEXT: .LBB7_3: +; RV32-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsh ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB7_2 +; RV32-NEXT: .LBB7_4: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 15 +; RV32-NEXT: .LBB7_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_4: -; RV32-NEXT: fsh ft1, 30(sp) +; RV32-NEXT: fsh ft0, 30(sp) +; RV32-NEXT: bnez a0, .LBB7_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 14 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_8 +; RV32-NEXT: .LBB7_7: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_6: -; RV32-NEXT: fsh ft1, 28(sp) +; RV32-NEXT: .LBB7_8: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 28(sp) +; RV32-NEXT: bnez a0, .LBB7_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 13 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_11 +; RV32-NEXT: .LBB7_10: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_8: -; RV32-NEXT: fsh ft1, 26(sp) +; RV32-NEXT: .LBB7_11: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 26(sp) +; RV32-NEXT: bnez a0, .LBB7_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 12 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_14 +; RV32-NEXT: .LBB7_13: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_10: -; RV32-NEXT: fsh ft1, 24(sp) +; RV32-NEXT: .LBB7_14: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB7_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 11 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_17 +; RV32-NEXT: .LBB7_16: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_12: -; RV32-NEXT: fsh ft1, 22(sp) +; RV32-NEXT: .LBB7_17: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 22(sp) +; RV32-NEXT: bnez a0, .LBB7_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 10 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_20 +; RV32-NEXT: .LBB7_19: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_14: -; RV32-NEXT: fsh ft1, 20(sp) +; RV32-NEXT: .LBB7_20: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 20(sp) +; RV32-NEXT: bnez a0, .LBB7_22 +; RV32-NEXT: # %bb.21: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 9 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_23 +; RV32-NEXT: .LBB7_22: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_16: -; RV32-NEXT: fsh ft1, 18(sp) +; RV32-NEXT: .LBB7_23: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 18(sp) +; RV32-NEXT: bnez a0, .LBB7_25 +; RV32-NEXT: # %bb.24: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 8 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_26 +; RV32-NEXT: .LBB7_25: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_18: -; RV32-NEXT: fsh ft1, 16(sp) +; RV32-NEXT: .LBB7_26: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB7_28 +; RV32-NEXT: # %bb.27: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 7 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_29 +; RV32-NEXT: .LBB7_28: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_20: -; RV32-NEXT: fsh ft1, 14(sp) +; RV32-NEXT: .LBB7_29: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 14(sp) +; RV32-NEXT: bnez a0, .LBB7_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_32 +; RV32-NEXT: .LBB7_31: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_22: -; RV32-NEXT: fsh ft1, 12(sp) +; RV32-NEXT: .LBB7_32: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB7_34 +; RV32-NEXT: # %bb.33: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_35 +; RV32-NEXT: .LBB7_34: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_24: -; RV32-NEXT: fsh ft1, 10(sp) +; RV32-NEXT: .LBB7_35: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 10(sp) +; RV32-NEXT: bnez a0, .LBB7_37 +; RV32-NEXT: # %bb.36: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_38 +; RV32-NEXT: .LBB7_37: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_26: -; RV32-NEXT: fsh ft1, 8(sp) +; RV32-NEXT: .LBB7_38: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB7_40 +; RV32-NEXT: # %bb.39: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB7_41 +; RV32-NEXT: .LBB7_40: +; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_28: -; RV32-NEXT: fsh ft1, 6(sp) +; RV32-NEXT: .LBB7_41: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 6(sp) +; RV32-NEXT: bnez a0, .LBB7_43 +; RV32-NEXT: # %bb.42: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_30: -; RV32-NEXT: fsh ft1, 4(sp) +; RV32-NEXT: j .LBB7_44 +; RV32-NEXT: .LBB7_43: ; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB7_44: ; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 4(sp) +; RV32-NEXT: bnez a0, .LBB7_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB7_47 +; RV32-NEXT: .LBB7_46: +; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB7_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.h ft1, ft0 -; RV32-NEXT: .LBB7_32: -; RV32-NEXT: fsh ft1, 2(sp) +; RV32-NEXT: .LBB7_47: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsh ft0, 2(sp) ; RV32-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; RV32-NEXT: vle16.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 @@ -896,164 +981,181 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 ; RV64-NEXT: feq.h a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB7_2 +; RV64-NEXT: bnez a0, .LBB7_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.h ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsh ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB7_4 ; RV64-NEXT: .LBB7_2: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 15 +; RV64-NEXT: j .LBB7_5 +; RV64-NEXT: .LBB7_3: +; RV64-NEXT: vsetvli zero, zero, e16,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsh ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB7_2 +; RV64-NEXT: .LBB7_4: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 15 +; RV64-NEXT: .LBB7_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_4: -; RV64-NEXT: fsh ft1, 30(sp) +; RV64-NEXT: fsh ft0, 30(sp) +; RV64-NEXT: bnez a0, .LBB7_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 14 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_6: -; RV64-NEXT: fsh ft1, 28(sp) +; RV64-NEXT: j .LBB7_8 +; RV64-NEXT: .LBB7_7: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 13 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: vslidedown.vi v26, v8, 14 ; RV64-NEXT: .LBB7_8: -; RV64-NEXT: fsh ft1, 26(sp) -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 12 ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: fsh ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB7_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 13 +; RV64-NEXT: j .LBB7_11 ; RV64-NEXT: .LBB7_10: -; RV64-NEXT: fsh ft1, 24(sp) ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 11 +; RV64-NEXT: vslidedown.vi v26, v8, 13 +; RV64-NEXT: .LBB7_11: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_12: -; RV64-NEXT: fsh ft1, 22(sp) +; RV64-NEXT: fsh ft0, 26(sp) +; RV64-NEXT: bnez a0, .LBB7_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 10 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_14: -; RV64-NEXT: fsh ft1, 20(sp) +; RV64-NEXT: vslidedown.vi v26, v10, 12 +; RV64-NEXT: j .LBB7_14 +; RV64-NEXT: .LBB7_13: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 9 +; RV64-NEXT: vslidedown.vi v26, v8, 12 +; RV64-NEXT: .LBB7_14: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: fsh ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB7_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 11 +; RV64-NEXT: j .LBB7_17 ; RV64-NEXT: .LBB7_16: -; RV64-NEXT: fsh ft1, 18(sp) ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 8 +; RV64-NEXT: vslidedown.vi v26, v8, 11 +; RV64-NEXT: .LBB7_17: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_18: -; RV64-NEXT: fsh ft1, 16(sp) +; RV64-NEXT: fsh ft0, 22(sp) +; RV64-NEXT: bnez a0, .LBB7_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 7 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_20: -; RV64-NEXT: fsh ft1, 14(sp) +; RV64-NEXT: vslidedown.vi v26, v10, 10 +; RV64-NEXT: j .LBB7_20 +; RV64-NEXT: .LBB7_19: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 6 +; RV64-NEXT: vslidedown.vi v26, v8, 10 +; RV64-NEXT: .LBB7_20: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v26 +; RV64-NEXT: fsh ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB7_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.h ft1, ft0 +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 9 +; RV64-NEXT: j .LBB7_23 ; RV64-NEXT: .LBB7_22: -; RV64-NEXT: fsh ft1, 12(sp) ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 5 +; RV64-NEXT: vslidedown.vi v26, v8, 9 +; RV64-NEXT: .LBB7_23: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 18(sp) +; RV64-NEXT: bnez a0, .LBB7_25 +; RV64-NEXT: # %bb.24: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 8 +; RV64-NEXT: j .LBB7_26 +; RV64-NEXT: .LBB7_25: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 8 +; RV64-NEXT: .LBB7_26: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB7_28 +; RV64-NEXT: # %bb.27: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 7 +; RV64-NEXT: j .LBB7_29 +; RV64-NEXT: .LBB7_28: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 7 +; RV64-NEXT: .LBB7_29: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 14(sp) +; RV64-NEXT: bnez a0, .LBB7_31 +; RV64-NEXT: # %bb.30: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 6 +; RV64-NEXT: j .LBB7_32 +; RV64-NEXT: .LBB7_31: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 6 +; RV64-NEXT: .LBB7_32: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB7_34 +; RV64-NEXT: # %bb.33: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 5 +; RV64-NEXT: j .LBB7_35 +; RV64-NEXT: .LBB7_34: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_24: -; RV64-NEXT: fsh ft1, 10(sp) +; RV64-NEXT: .LBB7_35: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 10(sp) +; RV64-NEXT: bnez a0, .LBB7_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB7_38 +; RV64-NEXT: .LBB7_37: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_26: -; RV64-NEXT: fsh ft1, 8(sp) +; RV64-NEXT: .LBB7_38: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB7_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB7_41 +; RV64-NEXT: .LBB7_40: +; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_28: -; RV64-NEXT: fsh ft1, 6(sp) +; RV64-NEXT: .LBB7_41: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 6(sp) +; RV64-NEXT: bnez a0, .LBB7_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_30: -; RV64-NEXT: fsh ft1, 4(sp) +; RV64-NEXT: j .LBB7_44 +; RV64-NEXT: .LBB7_43: ; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB7_44: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 4(sp) +; RV64-NEXT: bnez a0, .LBB7_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB7_47 +; RV64-NEXT: .LBB7_46: +; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB7_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.h ft1, ft0 -; RV64-NEXT: .LBB7_32: -; RV64-NEXT: fsh ft1, 2(sp) +; RV64-NEXT: .LBB7_47: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsh ft0, 2(sp) ; RV64-NEXT: vsetivli a0, 16, e16,m2,ta,mu ; RV64-NEXT: vle16.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -1069,20 +1171,22 @@ define <2 x float> @select_v2f32(i1 zeroext %c, <2 x float> %a, <2 x float> %b) { ; CHECK-LABEL: select_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft3, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft2, v25 ; CHECK-NEXT: bnez a0, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.s ft0, ft1 -; CHECK-NEXT: fmv.s ft2, ft3 +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: vsetivli a0, 1, e32,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB8_3 ; CHECK-NEXT: .LBB8_2: +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetivli a0, 1, e32,mf2,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB8_3: +; CHECK-NEXT: vfmv.f.s ft1, v25 ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu -; CHECK-NEXT: vfmv.v.f v8, ft2 +; CHECK-NEXT: vfmv.v.f v8, ft1 ; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x float> %a, <2 x float> %b @@ -1093,23 +1197,27 @@ ; CHECK-LABEL: selectcc_v2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: feq.s a0, fa0, fa1 +; CHECK-NEXT: bnez a0, .LBB9_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vsetivli a1, 1, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: j .LBB9_3 +; CHECK-NEXT: .LBB9_2: +; CHECK-NEXT: vsetivli a1, 1, e32,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB9_3: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: bnez a0, .LBB9_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.s ft0, ft1 -; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: vsetivli a1, 2, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.v.f v25, ft0 -; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: bnez a0, .LBB9_5 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: j .LBB9_6 +; CHECK-NEXT: .LBB9_5: +; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB9_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.s ft0, ft1 -; CHECK-NEXT: .LBB9_4: +; CHECK-NEXT: .LBB9_6: ; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -1124,44 +1232,49 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB10_2 +; CHECK-NEXT: bnez a0, .LBB10_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.s ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsw ft0, 0(sp) +; CHECK-NEXT: beqz a0, .LBB10_4 ; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: j .LBB10_5 +; CHECK-NEXT: .LBB10_3: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsw ft0, 0(sp) +; CHECK-NEXT: bnez a0, .LBB10_2 +; CHECK-NEXT: .LBB10_4: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: .LBB10_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB10_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB10_4: -; CHECK-NEXT: fsw ft1, 12(sp) +; CHECK-NEXT: fsw ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB10_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB10_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB10_6: -; CHECK-NEXT: fsw ft1, 8(sp) +; CHECK-NEXT: j .LBB10_8 +; CHECK-NEXT: .LBB10_7: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB10_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsw ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB10_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB10_11 +; CHECK-NEXT: .LBB10_10: +; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB10_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB10_8: -; CHECK-NEXT: fsw ft1, 4(sp) +; CHECK-NEXT: .LBB10_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsw ft0, 4(sp) ; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v8, (sp) ; CHECK-NEXT: addi sp, sp, 16 @@ -1176,44 +1289,49 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: feq.s a0, fa0, fa1 -; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB11_2 +; CHECK-NEXT: bnez a0, .LBB11_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.s ft0, ft1 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: fsw ft0, 0(sp) +; CHECK-NEXT: beqz a0, .LBB11_4 ; CHECK-NEXT: .LBB11_2: +; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 3 +; CHECK-NEXT: j .LBB11_5 +; CHECK-NEXT: .LBB11_3: +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: fsw ft0, 0(sp) +; CHECK-NEXT: bnez a0, .LBB11_2 +; CHECK-NEXT: .LBB11_4: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 +; CHECK-NEXT: .LBB11_5: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 3 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB11_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB11_4: -; CHECK-NEXT: fsw ft1, 12(sp) +; CHECK-NEXT: fsw ft0, 12(sp) +; CHECK-NEXT: bnez a0, .LBB11_7 +; CHECK-NEXT: # %bb.6: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 -; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 2 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB11_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB11_6: -; CHECK-NEXT: fsw ft1, 8(sp) +; CHECK-NEXT: j .LBB11_8 +; CHECK-NEXT: .LBB11_7: ; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu -; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: vslidedown.vi v25, v8, 2 +; CHECK-NEXT: .LBB11_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsw ft0, 8(sp) +; CHECK-NEXT: bnez a0, .LBB11_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB11_11 +; CHECK-NEXT: .LBB11_10: +; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 -; CHECK-NEXT: bnez a0, .LBB11_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: fmv.s ft1, ft0 -; CHECK-NEXT: .LBB11_8: -; CHECK-NEXT: fsw ft1, 4(sp) +; CHECK-NEXT: .LBB11_11: +; CHECK-NEXT: vfmv.f.s ft0, v25 +; CHECK-NEXT: fsw ft0, 4(sp) ; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v8, (sp) ; CHECK-NEXT: addi sp, sp, 16 @@ -1235,84 +1353,93 @@ ; RV32-NEXT: addi s0, sp, 64 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB12_2 +; RV32-NEXT: bnez a0, .LBB12_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB12_4 ; RV32-NEXT: .LBB12_2: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 7 +; RV32-NEXT: j .LBB12_5 +; RV32-NEXT: .LBB12_3: +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB12_2 +; RV32-NEXT: .LBB12_4: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 7 +; RV32-NEXT: .LBB12_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_4: -; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: fsw ft0, 28(sp) +; RV32-NEXT: bnez a0, .LBB12_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB12_8 +; RV32-NEXT: .LBB12_7: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_6: -; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: .LBB12_8: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB12_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB12_11 +; RV32-NEXT: .LBB12_10: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_8: -; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: .LBB12_11: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 20(sp) +; RV32-NEXT: bnez a0, .LBB12_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB12_14 +; RV32-NEXT: .LBB12_13: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_10: -; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: .LBB12_14: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB12_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB12_17 +; RV32-NEXT: .LBB12_16: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_12: -; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: .LBB12_17: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB12_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_14: -; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: j .LBB12_20 +; RV32-NEXT: .LBB12_19: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB12_20: ; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB12_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB12_23 +; RV32-NEXT: .LBB12_22: +; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB12_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB12_16: -; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: .LBB12_23: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 4(sp) ; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV32-NEXT: vle32.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 @@ -1332,84 +1459,93 @@ ; RV64-NEXT: addi s0, sp, 64 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB12_2 +; RV64-NEXT: bnez a0, .LBB12_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB12_4 ; RV64-NEXT: .LBB12_2: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 7 +; RV64-NEXT: j .LBB12_5 +; RV64-NEXT: .LBB12_3: +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB12_2 +; RV64-NEXT: .LBB12_4: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 7 +; RV64-NEXT: .LBB12_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_4: -; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: fsw ft0, 28(sp) +; RV64-NEXT: bnez a0, .LBB12_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB12_8 +; RV64-NEXT: .LBB12_7: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_6: -; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: .LBB12_8: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB12_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB12_11 +; RV64-NEXT: .LBB12_10: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_8: -; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: .LBB12_11: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 20(sp) +; RV64-NEXT: bnez a0, .LBB12_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB12_14 +; RV64-NEXT: .LBB12_13: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_10: -; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: .LBB12_14: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB12_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB12_17 +; RV64-NEXT: .LBB12_16: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_12: -; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: .LBB12_17: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB12_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_14: -; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: j .LBB12_20 +; RV64-NEXT: .LBB12_19: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB12_20: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB12_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB12_23 +; RV64-NEXT: .LBB12_22: +; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB12_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB12_16: -; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: .LBB12_23: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 4(sp) ; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV64-NEXT: vle32.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -1434,84 +1570,93 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 ; RV32-NEXT: feq.s a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB13_2 +; RV32-NEXT: bnez a0, .LBB13_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB13_4 ; RV32-NEXT: .LBB13_2: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 7 +; RV32-NEXT: j .LBB13_5 +; RV32-NEXT: .LBB13_3: +; RV32-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB13_2 +; RV32-NEXT: .LBB13_4: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 7 +; RV32-NEXT: .LBB13_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_4: -; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: fsw ft0, 28(sp) +; RV32-NEXT: bnez a0, .LBB13_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB13_8 +; RV32-NEXT: .LBB13_7: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_6: -; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: .LBB13_8: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB13_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB13_11 +; RV32-NEXT: .LBB13_10: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_8: -; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: .LBB13_11: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 20(sp) +; RV32-NEXT: bnez a0, .LBB13_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB13_14 +; RV32-NEXT: .LBB13_13: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_10: -; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: .LBB13_14: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB13_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 -; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: j .LBB13_17 +; RV32-NEXT: .LBB13_16: +; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_12: -; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: .LBB13_17: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB13_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_14: -; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: j .LBB13_20 +; RV32-NEXT: .LBB13_19: ; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB13_20: ; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB13_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB13_23 +; RV32-NEXT: .LBB13_22: +; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB13_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB13_16: -; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: .LBB13_23: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsw ft0, 4(sp) ; RV32-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV32-NEXT: vle32.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 @@ -1532,84 +1677,93 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 ; RV64-NEXT: feq.s a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB13_2 +; RV64-NEXT: bnez a0, .LBB13_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB13_4 ; RV64-NEXT: .LBB13_2: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 7 +; RV64-NEXT: j .LBB13_5 +; RV64-NEXT: .LBB13_3: +; RV64-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB13_2 +; RV64-NEXT: .LBB13_4: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 7 +; RV64-NEXT: .LBB13_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_4: -; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: fsw ft0, 28(sp) +; RV64-NEXT: bnez a0, .LBB13_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB13_8 +; RV64-NEXT: .LBB13_7: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_6: -; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: .LBB13_8: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB13_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB13_11 +; RV64-NEXT: .LBB13_10: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_8: -; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: .LBB13_11: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 20(sp) +; RV64-NEXT: bnez a0, .LBB13_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB13_14 +; RV64-NEXT: .LBB13_13: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_10: -; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: .LBB13_14: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB13_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 -; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: j .LBB13_17 +; RV64-NEXT: .LBB13_16: +; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_12: -; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: .LBB13_17: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB13_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_14: -; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: j .LBB13_20 +; RV64-NEXT: .LBB13_19: ; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB13_20: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB13_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB13_23 +; RV64-NEXT: .LBB13_22: +; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB13_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB13_16: -; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: .LBB13_23: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsw ft0, 4(sp) ; RV64-NEXT: vsetivli a0, 8, e32,m2,ta,mu ; RV64-NEXT: vle32.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -1634,164 +1788,181 @@ ; RV32-NEXT: addi s0, sp, 128 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v12 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB14_2 +; RV32-NEXT: bnez a0, .LBB14_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v12 +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB14_4 ; RV32-NEXT: .LBB14_2: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 15 +; RV32-NEXT: j .LBB14_5 +; RV32-NEXT: .LBB14_3: +; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB14_2 +; RV32-NEXT: .LBB14_4: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 15 +; RV32-NEXT: .LBB14_5: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_4: -; RV32-NEXT: fsw ft1, 60(sp) +; RV32-NEXT: fsw ft0, 60(sp) +; RV32-NEXT: bnez a0, .LBB14_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 14 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_8 +; RV32-NEXT: .LBB14_7: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_6: -; RV32-NEXT: fsw ft1, 56(sp) +; RV32-NEXT: .LBB14_8: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 56(sp) +; RV32-NEXT: bnez a0, .LBB14_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 13 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_11 +; RV32-NEXT: .LBB14_10: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_8: -; RV32-NEXT: fsw ft1, 52(sp) +; RV32-NEXT: .LBB14_11: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 52(sp) +; RV32-NEXT: bnez a0, .LBB14_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 12 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_14 +; RV32-NEXT: .LBB14_13: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_10: -; RV32-NEXT: fsw ft1, 48(sp) +; RV32-NEXT: .LBB14_14: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 48(sp) +; RV32-NEXT: bnez a0, .LBB14_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 11 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_17 +; RV32-NEXT: .LBB14_16: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_12: -; RV32-NEXT: fsw ft1, 44(sp) +; RV32-NEXT: .LBB14_17: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 44(sp) +; RV32-NEXT: bnez a0, .LBB14_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 10 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_20 +; RV32-NEXT: .LBB14_19: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_14: -; RV32-NEXT: fsw ft1, 40(sp) +; RV32-NEXT: .LBB14_20: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB14_22 +; RV32-NEXT: # %bb.21: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 9 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_23 +; RV32-NEXT: .LBB14_22: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_16: -; RV32-NEXT: fsw ft1, 36(sp) +; RV32-NEXT: .LBB14_23: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 36(sp) +; RV32-NEXT: bnez a0, .LBB14_25 +; RV32-NEXT: # %bb.24: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 8 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_26 +; RV32-NEXT: .LBB14_25: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_18: -; RV32-NEXT: fsw ft1, 32(sp) +; RV32-NEXT: .LBB14_26: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB14_28 +; RV32-NEXT: # %bb.27: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_29 +; RV32-NEXT: .LBB14_28: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_20: -; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: .LBB14_29: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 28(sp) +; RV32-NEXT: bnez a0, .LBB14_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_32 +; RV32-NEXT: .LBB14_31: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_22: -; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: .LBB14_32: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB14_34 +; RV32-NEXT: # %bb.33: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_35 +; RV32-NEXT: .LBB14_34: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_24: -; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: .LBB14_35: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 20(sp) +; RV32-NEXT: bnez a0, .LBB14_37 +; RV32-NEXT: # %bb.36: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_38 +; RV32-NEXT: .LBB14_37: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_26: -; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: .LBB14_38: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB14_40 +; RV32-NEXT: # %bb.39: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB14_41 +; RV32-NEXT: .LBB14_40: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_28: -; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: .LBB14_41: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB14_43 +; RV32-NEXT: # %bb.42: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_30: -; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: j .LBB14_44 +; RV32-NEXT: .LBB14_43: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: .LBB14_44: ; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB14_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: j .LBB14_47 +; RV32-NEXT: .LBB14_46: +; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB14_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB14_32: -; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: .LBB14_47: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 4(sp) ; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; RV32-NEXT: vle32.v v8, (sp) ; RV32-NEXT: addi sp, s0, -128 @@ -1811,164 +1982,181 @@ ; RV64-NEXT: addi s0, sp, 128 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v12 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB14_2 +; RV64-NEXT: bnez a0, .LBB14_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v12 +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB14_4 ; RV64-NEXT: .LBB14_2: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v8, 15 +; RV64-NEXT: j .LBB14_5 +; RV64-NEXT: .LBB14_3: +; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB14_2 +; RV64-NEXT: .LBB14_4: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 15 +; RV64-NEXT: .LBB14_5: ; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_4: -; RV64-NEXT: fsw ft1, 60(sp) +; RV64-NEXT: fsw ft0, 60(sp) +; RV64-NEXT: bnez a0, .LBB14_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 14 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_8 +; RV64-NEXT: .LBB14_7: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_6: -; RV64-NEXT: fsw ft1, 56(sp) +; RV64-NEXT: .LBB14_8: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB14_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 13 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_11 +; RV64-NEXT: .LBB14_10: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_8: -; RV64-NEXT: fsw ft1, 52(sp) +; RV64-NEXT: .LBB14_11: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 52(sp) +; RV64-NEXT: bnez a0, .LBB14_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 12 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_14 +; RV64-NEXT: .LBB14_13: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_10: -; RV64-NEXT: fsw ft1, 48(sp) +; RV64-NEXT: .LBB14_14: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB14_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 11 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_17 +; RV64-NEXT: .LBB14_16: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_12: -; RV64-NEXT: fsw ft1, 44(sp) +; RV64-NEXT: .LBB14_17: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 44(sp) +; RV64-NEXT: bnez a0, .LBB14_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 10 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_20 +; RV64-NEXT: .LBB14_19: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_14: -; RV64-NEXT: fsw ft1, 40(sp) +; RV64-NEXT: .LBB14_20: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB14_22 +; RV64-NEXT: # %bb.21: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 9 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_23 +; RV64-NEXT: .LBB14_22: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_16: -; RV64-NEXT: fsw ft1, 36(sp) +; RV64-NEXT: .LBB14_23: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 36(sp) +; RV64-NEXT: bnez a0, .LBB14_25 +; RV64-NEXT: # %bb.24: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 8 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_26 +; RV64-NEXT: .LBB14_25: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_18: -; RV64-NEXT: fsw ft1, 32(sp) +; RV64-NEXT: .LBB14_26: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB14_28 +; RV64-NEXT: # %bb.27: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_29 +; RV64-NEXT: .LBB14_28: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_20: -; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: .LBB14_29: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 28(sp) +; RV64-NEXT: bnez a0, .LBB14_31 +; RV64-NEXT: # %bb.30: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_32 +; RV64-NEXT: .LBB14_31: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_22: -; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: .LBB14_32: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB14_34 +; RV64-NEXT: # %bb.33: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_35 +; RV64-NEXT: .LBB14_34: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_24: -; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: .LBB14_35: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 20(sp) +; RV64-NEXT: bnez a0, .LBB14_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_38 +; RV64-NEXT: .LBB14_37: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_26: -; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: .LBB14_38: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB14_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB14_41 +; RV64-NEXT: .LBB14_40: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_28: -; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: .LBB14_41: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB14_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_30: -; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: j .LBB14_44 +; RV64-NEXT: .LBB14_43: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: .LBB14_44: ; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB14_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: j .LBB14_47 +; RV64-NEXT: .LBB14_46: +; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB14_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB14_32: -; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: .LBB14_47: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 4(sp) ; RV64-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; RV64-NEXT: vle32.v v8, (sp) ; RV64-NEXT: addi sp, s0, -128 @@ -1993,164 +2181,181 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 ; RV32-NEXT: feq.s a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v12 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB15_2 +; RV32-NEXT: bnez a0, .LBB15_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.s ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v12 +; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB15_4 ; RV32-NEXT: .LBB15_2: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 15 +; RV32-NEXT: j .LBB15_5 +; RV32-NEXT: .LBB15_3: +; RV32-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsw ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB15_2 +; RV32-NEXT: .LBB15_4: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 15 +; RV32-NEXT: .LBB15_5: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_4: -; RV32-NEXT: fsw ft1, 60(sp) +; RV32-NEXT: fsw ft0, 60(sp) +; RV32-NEXT: bnez a0, .LBB15_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 14 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_6: -; RV32-NEXT: fsw ft1, 56(sp) +; RV32-NEXT: j .LBB15_8 +; RV32-NEXT: .LBB15_7: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 13 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: vslidedown.vi v28, v8, 14 ; RV32-NEXT: .LBB15_8: -; RV32-NEXT: fsw ft1, 52(sp) -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 12 ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: fsw ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB15_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 13 +; RV32-NEXT: j .LBB15_11 ; RV32-NEXT: .LBB15_10: -; RV32-NEXT: fsw ft1, 48(sp) ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 11 +; RV32-NEXT: vslidedown.vi v28, v8, 13 +; RV32-NEXT: .LBB15_11: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_12: -; RV32-NEXT: fsw ft1, 44(sp) +; RV32-NEXT: fsw ft0, 52(sp) +; RV32-NEXT: bnez a0, .LBB15_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 10 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_14: -; RV32-NEXT: fsw ft1, 40(sp) +; RV32-NEXT: vslidedown.vi v28, v12, 12 +; RV32-NEXT: j .LBB15_14 +; RV32-NEXT: .LBB15_13: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 9 +; RV32-NEXT: vslidedown.vi v28, v8, 12 +; RV32-NEXT: .LBB15_14: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v28 +; RV32-NEXT: fsw ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB15_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.s ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 11 +; RV32-NEXT: j .LBB15_17 ; RV32-NEXT: .LBB15_16: -; RV32-NEXT: fsw ft1, 36(sp) ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 8 +; RV32-NEXT: vslidedown.vi v28, v8, 11 +; RV32-NEXT: .LBB15_17: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 44(sp) +; RV32-NEXT: bnez a0, .LBB15_19 +; RV32-NEXT: # %bb.18: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 10 +; RV32-NEXT: j .LBB15_20 +; RV32-NEXT: .LBB15_19: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 10 +; RV32-NEXT: .LBB15_20: ; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB15_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 9 +; RV32-NEXT: j .LBB15_23 +; RV32-NEXT: .LBB15_22: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 9 +; RV32-NEXT: .LBB15_23: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 36(sp) +; RV32-NEXT: bnez a0, .LBB15_25 +; RV32-NEXT: # %bb.24: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 8 +; RV32-NEXT: j .LBB15_26 +; RV32-NEXT: .LBB15_25: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_18: -; RV32-NEXT: fsw ft1, 32(sp) +; RV32-NEXT: .LBB15_26: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB15_28 +; RV32-NEXT: # %bb.27: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB15_29 +; RV32-NEXT: .LBB15_28: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_20: -; RV32-NEXT: fsw ft1, 28(sp) +; RV32-NEXT: .LBB15_29: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 28(sp) +; RV32-NEXT: bnez a0, .LBB15_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB15_32 +; RV32-NEXT: .LBB15_31: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_22: -; RV32-NEXT: fsw ft1, 24(sp) +; RV32-NEXT: .LBB15_32: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB15_34 +; RV32-NEXT: # %bb.33: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB15_35 +; RV32-NEXT: .LBB15_34: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_24: -; RV32-NEXT: fsw ft1, 20(sp) +; RV32-NEXT: .LBB15_35: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 20(sp) +; RV32-NEXT: bnez a0, .LBB15_37 +; RV32-NEXT: # %bb.36: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB15_38 +; RV32-NEXT: .LBB15_37: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_26: -; RV32-NEXT: fsw ft1, 16(sp) +; RV32-NEXT: .LBB15_38: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB15_40 +; RV32-NEXT: # %bb.39: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB15_41 +; RV32-NEXT: .LBB15_40: +; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_28: -; RV32-NEXT: fsw ft1, 12(sp) +; RV32-NEXT: .LBB15_41: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 12(sp) +; RV32-NEXT: bnez a0, .LBB15_43 +; RV32-NEXT: # %bb.42: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_30: -; RV32-NEXT: fsw ft1, 8(sp) +; RV32-NEXT: j .LBB15_44 +; RV32-NEXT: .LBB15_43: ; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: .LBB15_44: ; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 8(sp) +; RV32-NEXT: bnez a0, .LBB15_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: j .LBB15_47 +; RV32-NEXT: .LBB15_46: +; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB15_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.s ft1, ft0 -; RV32-NEXT: .LBB15_32: -; RV32-NEXT: fsw ft1, 4(sp) +; RV32-NEXT: .LBB15_47: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsw ft0, 4(sp) ; RV32-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; RV32-NEXT: vle32.v v8, (sp) ; RV32-NEXT: addi sp, s0, -128 @@ -2171,164 +2376,181 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: feq.s a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v12 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB15_2 +; RV64-NEXT: bnez a0, .LBB15_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.s ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v12 +; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB15_4 ; RV64-NEXT: .LBB15_2: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v8, 15 +; RV64-NEXT: j .LBB15_5 +; RV64-NEXT: .LBB15_3: +; RV64-NEXT: vsetvli zero, zero, e32,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsw ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB15_2 +; RV64-NEXT: .LBB15_4: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 15 +; RV64-NEXT: .LBB15_5: ; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_4: -; RV64-NEXT: fsw ft1, 60(sp) +; RV64-NEXT: fsw ft0, 60(sp) +; RV64-NEXT: bnez a0, .LBB15_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 14 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_8 +; RV64-NEXT: .LBB15_7: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_6: -; RV64-NEXT: fsw ft1, 56(sp) +; RV64-NEXT: .LBB15_8: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB15_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 13 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_11 +; RV64-NEXT: .LBB15_10: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_8: -; RV64-NEXT: fsw ft1, 52(sp) +; RV64-NEXT: .LBB15_11: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 52(sp) +; RV64-NEXT: bnez a0, .LBB15_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 12 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_14 +; RV64-NEXT: .LBB15_13: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_10: -; RV64-NEXT: fsw ft1, 48(sp) +; RV64-NEXT: .LBB15_14: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB15_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 11 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_17 +; RV64-NEXT: .LBB15_16: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_12: -; RV64-NEXT: fsw ft1, 44(sp) +; RV64-NEXT: .LBB15_17: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 44(sp) +; RV64-NEXT: bnez a0, .LBB15_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 10 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_20 +; RV64-NEXT: .LBB15_19: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_14: -; RV64-NEXT: fsw ft1, 40(sp) +; RV64-NEXT: .LBB15_20: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB15_22 +; RV64-NEXT: # %bb.21: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 9 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_23 +; RV64-NEXT: .LBB15_22: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_16: -; RV64-NEXT: fsw ft1, 36(sp) +; RV64-NEXT: .LBB15_23: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 36(sp) +; RV64-NEXT: bnez a0, .LBB15_25 +; RV64-NEXT: # %bb.24: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 8 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_26 +; RV64-NEXT: .LBB15_25: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_18: -; RV64-NEXT: fsw ft1, 32(sp) +; RV64-NEXT: .LBB15_26: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB15_28 +; RV64-NEXT: # %bb.27: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_29 +; RV64-NEXT: .LBB15_28: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_20: -; RV64-NEXT: fsw ft1, 28(sp) +; RV64-NEXT: .LBB15_29: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 28(sp) +; RV64-NEXT: bnez a0, .LBB15_31 +; RV64-NEXT: # %bb.30: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_32 +; RV64-NEXT: .LBB15_31: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_22: -; RV64-NEXT: fsw ft1, 24(sp) +; RV64-NEXT: .LBB15_32: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB15_34 +; RV64-NEXT: # %bb.33: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_35 +; RV64-NEXT: .LBB15_34: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_24: -; RV64-NEXT: fsw ft1, 20(sp) +; RV64-NEXT: .LBB15_35: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 20(sp) +; RV64-NEXT: bnez a0, .LBB15_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_38 +; RV64-NEXT: .LBB15_37: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_26: -; RV64-NEXT: fsw ft1, 16(sp) +; RV64-NEXT: .LBB15_38: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB15_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB15_41 +; RV64-NEXT: .LBB15_40: +; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_28: -; RV64-NEXT: fsw ft1, 12(sp) +; RV64-NEXT: .LBB15_41: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 12(sp) +; RV64-NEXT: bnez a0, .LBB15_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_30: -; RV64-NEXT: fsw ft1, 8(sp) +; RV64-NEXT: j .LBB15_44 +; RV64-NEXT: .LBB15_43: ; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu -; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: .LBB15_44: ; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 8(sp) +; RV64-NEXT: bnez a0, .LBB15_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: j .LBB15_47 +; RV64-NEXT: .LBB15_46: +; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB15_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.s ft1, ft0 -; RV64-NEXT: .LBB15_32: -; RV64-NEXT: fsw ft1, 4(sp) +; RV64-NEXT: .LBB15_47: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsw ft0, 4(sp) ; RV64-NEXT: vsetivli a0, 16, e32,m4,ta,mu ; RV64-NEXT: vle32.v v8, (sp) ; RV64-NEXT: addi sp, s0, -128 @@ -2344,20 +2566,22 @@ define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: select_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu -; CHECK-NEXT: vfmv.f.s ft1, v9 -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft3, v25 -; CHECK-NEXT: vslidedown.vi v25, v8, 1 -; CHECK-NEXT: vfmv.f.s ft2, v25 ; CHECK-NEXT: bnez a0, .LBB16_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.d ft0, ft1 -; CHECK-NEXT: fmv.d ft2, ft3 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v9, 1 +; CHECK-NEXT: j .LBB16_3 ; CHECK-NEXT: .LBB16_2: +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vsetivli a0, 1, e64,m1,ta,mu +; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB16_3: +; CHECK-NEXT: vfmv.f.s ft1, v25 ; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu -; CHECK-NEXT: vfmv.v.f v8, ft2 +; CHECK-NEXT: vfmv.v.f v8, ft1 ; CHECK-NEXT: vfmv.s.f v8, ft0 ; CHECK-NEXT: ret %v = select i1 %c, <2 x double> %a, <2 x double> %b @@ -2368,23 +2592,27 @@ ; CHECK-LABEL: selectcc_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: feq.d a0, fa0, fa1 +; CHECK-NEXT: bnez a0, .LBB17_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 -; CHECK-NEXT: vfmv.f.s ft1, v25 +; CHECK-NEXT: j .LBB17_3 +; CHECK-NEXT: .LBB17_2: +; CHECK-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 +; CHECK-NEXT: .LBB17_3: ; CHECK-NEXT: vfmv.f.s ft0, v25 -; CHECK-NEXT: bnez a0, .LBB17_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: fmv.d ft0, ft1 -; CHECK-NEXT: .LBB17_2: ; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; CHECK-NEXT: vfmv.v.f v25, ft0 -; CHECK-NEXT: vfmv.f.s ft1, v9 +; CHECK-NEXT: bnez a0, .LBB17_5 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s ft0, v9 +; CHECK-NEXT: j .LBB17_6 +; CHECK-NEXT: .LBB17_5: +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: bnez a0, .LBB17_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: fmv.d ft0, ft1 -; CHECK-NEXT: .LBB17_4: +; CHECK-NEXT: .LBB17_6: ; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 @@ -2406,44 +2634,49 @@ ; RV32-NEXT: addi s0, sp, 64 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 -; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB18_2 +; RV32-NEXT: bnez a0, .LBB18_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB18_4 ; RV32-NEXT: .LBB18_2: +; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: j .LBB18_5 +; RV32-NEXT: .LBB18_3: +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB18_2 +; RV32-NEXT: .LBB18_4: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: .LBB18_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB18_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB18_4: -; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB18_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB18_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB18_6: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: j .LBB18_8 +; RV32-NEXT: .LBB18_7: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB18_8: ; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB18_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB18_11 +; RV32-NEXT: .LBB18_10: +; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB18_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB18_8: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: .LBB18_11: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 @@ -2463,44 +2696,49 @@ ; RV64-NEXT: addi s0, sp, 64 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 -; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB18_2 +; RV64-NEXT: bnez a0, .LBB18_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB18_4 ; RV64-NEXT: .LBB18_2: +; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 3 +; RV64-NEXT: j .LBB18_5 +; RV64-NEXT: .LBB18_3: +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB18_2 +; RV64-NEXT: .LBB18_4: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 +; RV64-NEXT: .LBB18_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB18_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB18_4: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB18_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB18_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB18_6: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB18_8 +; RV64-NEXT: .LBB18_7: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB18_8: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB18_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB18_11 +; RV64-NEXT: .LBB18_10: +; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB18_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB18_8: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB18_11: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -2525,44 +2763,49 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -32 ; RV32-NEXT: feq.d a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v10 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB19_2 +; RV32-NEXT: bnez a0, .LBB19_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v10 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB19_4 ; RV32-NEXT: .LBB19_2: +; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v8, 3 +; RV32-NEXT: j .LBB19_5 +; RV32-NEXT: .LBB19_3: +; RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB19_2 +; RV32-NEXT: .LBB19_4: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 +; RV32-NEXT: .LBB19_5: ; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB19_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB19_4: -; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB19_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 -; RV32-NEXT: vfmv.f.s ft0, v26 -; RV32-NEXT: vslidedown.vi v26, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB19_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB19_6: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: j .LBB19_8 +; RV32-NEXT: .LBB19_7: ; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: vslidedown.vi v26, v8, 2 +; RV32-NEXT: .LBB19_8: ; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB19_10 +; RV32-NEXT: # %bb.9: +; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu +; RV32-NEXT: vslidedown.vi v26, v10, 1 +; RV32-NEXT: j .LBB19_11 +; RV32-NEXT: .LBB19_10: +; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v26 -; RV32-NEXT: bnez a0, .LBB19_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB19_8: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: .LBB19_11: +; RV32-NEXT: vfmv.f.s ft0, v26 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -64 @@ -2583,44 +2826,49 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -32 ; RV64-NEXT: feq.d a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v10 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB19_2 +; RV64-NEXT: bnez a0, .LBB19_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v10 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB19_4 ; RV64-NEXT: .LBB19_2: +; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v8, 3 +; RV64-NEXT: j .LBB19_5 +; RV64-NEXT: .LBB19_3: +; RV64-NEXT: vsetvli zero, zero, e64,m2,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB19_2 +; RV64-NEXT: .LBB19_4: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 +; RV64-NEXT: .LBB19_5: ; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB19_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB19_4: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB19_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 -; RV64-NEXT: vfmv.f.s ft0, v26 -; RV64-NEXT: vslidedown.vi v26, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB19_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB19_6: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB19_8 +; RV64-NEXT: .LBB19_7: ; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu -; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: vslidedown.vi v26, v8, 2 +; RV64-NEXT: .LBB19_8: ; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB19_10 +; RV64-NEXT: # %bb.9: +; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu +; RV64-NEXT: vslidedown.vi v26, v10, 1 +; RV64-NEXT: j .LBB19_11 +; RV64-NEXT: .LBB19_10: +; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v26 -; RV64-NEXT: bnez a0, .LBB19_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB19_8: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB19_11: +; RV64-NEXT: vfmv.f.s ft0, v26 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 4, e64,m2,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -64 @@ -2645,84 +2893,93 @@ ; RV32-NEXT: addi s0, sp, 128 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 -; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v12 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB20_2 +; RV32-NEXT: bnez a0, .LBB20_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v12 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB20_4 ; RV32-NEXT: .LBB20_2: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 7 +; RV32-NEXT: j .LBB20_5 +; RV32-NEXT: .LBB20_3: +; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB20_2 +; RV32-NEXT: .LBB20_4: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 +; RV32-NEXT: .LBB20_5: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_4: -; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: fsd ft0, 56(sp) +; RV32-NEXT: bnez a0, .LBB20_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB20_8 +; RV32-NEXT: .LBB20_7: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_6: -; RV32-NEXT: fsd ft1, 48(sp) +; RV32-NEXT: .LBB20_8: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 48(sp) +; RV32-NEXT: bnez a0, .LBB20_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB20_11 +; RV32-NEXT: .LBB20_10: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_8: -; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: .LBB20_11: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB20_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB20_14 +; RV32-NEXT: .LBB20_13: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_10: -; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: .LBB20_14: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB20_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB20_17 +; RV32-NEXT: .LBB20_16: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_12: -; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: .LBB20_17: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB20_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_14: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: j .LBB20_20 +; RV32-NEXT: .LBB20_19: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: .LBB20_20: ; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB20_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: j .LBB20_23 +; RV32-NEXT: .LBB20_22: +; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB20_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB20_16: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: .LBB20_23: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -128 @@ -2742,84 +2999,93 @@ ; RV64-NEXT: addi s0, sp, 128 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 -; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v12 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB20_2 +; RV64-NEXT: bnez a0, .LBB20_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v12 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB20_4 ; RV64-NEXT: .LBB20_2: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v8, 7 +; RV64-NEXT: j .LBB20_5 +; RV64-NEXT: .LBB20_3: +; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB20_2 +; RV64-NEXT: .LBB20_4: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 +; RV64-NEXT: .LBB20_5: ; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_4: -; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: fsd ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB20_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB20_8 +; RV64-NEXT: .LBB20_7: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_6: -; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: .LBB20_8: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB20_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB20_11 +; RV64-NEXT: .LBB20_10: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_8: -; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: .LBB20_11: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB20_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB20_14 +; RV64-NEXT: .LBB20_13: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_10: -; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: .LBB20_14: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB20_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB20_17 +; RV64-NEXT: .LBB20_16: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_12: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: .LBB20_17: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB20_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_14: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB20_20 +; RV64-NEXT: .LBB20_19: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu -; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: .LBB20_20: ; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB20_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: j .LBB20_23 +; RV64-NEXT: .LBB20_22: +; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB20_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB20_16: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB20_23: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -128 @@ -2844,84 +3110,93 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 ; RV32-NEXT: feq.d a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v12 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB21_2 +; RV32-NEXT: bnez a0, .LBB21_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v12 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB21_4 ; RV32-NEXT: .LBB21_2: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v8, 7 +; RV32-NEXT: j .LBB21_5 +; RV32-NEXT: .LBB21_3: +; RV32-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB21_2 +; RV32-NEXT: .LBB21_4: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 +; RV32-NEXT: .LBB21_5: ; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_4: -; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: fsd ft0, 56(sp) +; RV32-NEXT: bnez a0, .LBB21_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB21_8 +; RV32-NEXT: .LBB21_7: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_6: -; RV32-NEXT: fsd ft1, 48(sp) +; RV32-NEXT: .LBB21_8: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 48(sp) +; RV32-NEXT: bnez a0, .LBB21_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB21_11 +; RV32-NEXT: .LBB21_10: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_8: -; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: .LBB21_11: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB21_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB21_14 +; RV32-NEXT: .LBB21_13: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_10: -; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: .LBB21_14: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB21_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 -; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: j .LBB21_17 +; RV32-NEXT: .LBB21_16: +; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_12: -; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: .LBB21_17: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB21_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 -; RV32-NEXT: vfmv.f.s ft0, v28 -; RV32-NEXT: vslidedown.vi v28, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_14: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: j .LBB21_20 +; RV32-NEXT: .LBB21_19: ; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu -; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: vslidedown.vi v28, v8, 2 +; RV32-NEXT: .LBB21_20: ; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB21_22 +; RV32-NEXT: # %bb.21: +; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu +; RV32-NEXT: vslidedown.vi v28, v12, 1 +; RV32-NEXT: j .LBB21_23 +; RV32-NEXT: .LBB21_22: +; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v28 -; RV32-NEXT: bnez a0, .LBB21_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB21_16: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: .LBB21_23: +; RV32-NEXT: vfmv.f.s ft0, v28 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -128 @@ -2942,84 +3217,93 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: feq.d a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v12 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB21_2 +; RV64-NEXT: bnez a0, .LBB21_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v12 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB21_4 ; RV64-NEXT: .LBB21_2: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v8, 7 +; RV64-NEXT: j .LBB21_5 +; RV64-NEXT: .LBB21_3: +; RV64-NEXT: vsetvli zero, zero, e64,m4,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB21_2 +; RV64-NEXT: .LBB21_4: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 +; RV64-NEXT: .LBB21_5: ; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_4: -; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: fsd ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB21_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB21_8 +; RV64-NEXT: .LBB21_7: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_6: -; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: .LBB21_8: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB21_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB21_11 +; RV64-NEXT: .LBB21_10: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_8: -; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: .LBB21_11: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB21_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB21_14 +; RV64-NEXT: .LBB21_13: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_10: -; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: .LBB21_14: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB21_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 -; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: j .LBB21_17 +; RV64-NEXT: .LBB21_16: +; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_12: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: .LBB21_17: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB21_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 -; RV64-NEXT: vfmv.f.s ft0, v28 -; RV64-NEXT: vslidedown.vi v28, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_14: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB21_20 +; RV64-NEXT: .LBB21_19: ; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu -; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: vslidedown.vi v28, v8, 2 +; RV64-NEXT: .LBB21_20: ; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB21_22 +; RV64-NEXT: # %bb.21: +; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu +; RV64-NEXT: vslidedown.vi v28, v12, 1 +; RV64-NEXT: j .LBB21_23 +; RV64-NEXT: .LBB21_22: +; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v28 -; RV64-NEXT: bnez a0, .LBB21_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB21_16: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB21_23: +; RV64-NEXT: vfmv.f.s ft0, v28 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 8, e64,m4,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -128 @@ -3044,164 +3328,181 @@ ; RV32-NEXT: addi s0, sp, 256 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -128 -; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v16 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB22_2 +; RV32-NEXT: bnez a0, .LBB22_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v16 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB22_4 ; RV32-NEXT: .LBB22_2: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 15 +; RV32-NEXT: j .LBB22_5 +; RV32-NEXT: .LBB22_3: +; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB22_2 +; RV32-NEXT: .LBB22_4: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 15 +; RV32-NEXT: .LBB22_5: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_4: -; RV32-NEXT: fsd ft1, 120(sp) +; RV32-NEXT: fsd ft0, 120(sp) +; RV32-NEXT: bnez a0, .LBB22_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 14 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_6: -; RV32-NEXT: fsd ft1, 112(sp) +; RV32-NEXT: j .LBB22_8 +; RV32-NEXT: .LBB22_7: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 13 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vslidedown.vi v24, v8, 14 ; RV32-NEXT: .LBB22_8: -; RV32-NEXT: fsd ft1, 104(sp) -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 12 ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: fsd ft0, 112(sp) ; RV32-NEXT: bnez a0, .LBB22_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 13 +; RV32-NEXT: j .LBB22_11 ; RV32-NEXT: .LBB22_10: -; RV32-NEXT: fsd ft1, 96(sp) ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 11 +; RV32-NEXT: vslidedown.vi v24, v8, 13 +; RV32-NEXT: .LBB22_11: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_12: -; RV32-NEXT: fsd ft1, 88(sp) +; RV32-NEXT: fsd ft0, 104(sp) +; RV32-NEXT: bnez a0, .LBB22_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 10 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_14: -; RV32-NEXT: fsd ft1, 80(sp) +; RV32-NEXT: vslidedown.vi v24, v16, 12 +; RV32-NEXT: j .LBB22_14 +; RV32-NEXT: .LBB22_13: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 9 +; RV32-NEXT: vslidedown.vi v24, v8, 12 +; RV32-NEXT: .LBB22_14: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: fsd ft0, 96(sp) ; RV32-NEXT: bnez a0, .LBB22_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 11 +; RV32-NEXT: j .LBB22_17 ; RV32-NEXT: .LBB22_16: -; RV32-NEXT: fsd ft1, 72(sp) ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 8 +; RV32-NEXT: vslidedown.vi v24, v8, 11 +; RV32-NEXT: .LBB22_17: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_18: -; RV32-NEXT: fsd ft1, 64(sp) +; RV32-NEXT: fsd ft0, 88(sp) +; RV32-NEXT: bnez a0, .LBB22_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 7 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_20: -; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: vslidedown.vi v24, v16, 10 +; RV32-NEXT: j .LBB22_20 +; RV32-NEXT: .LBB22_19: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 6 +; RV32-NEXT: vslidedown.vi v24, v8, 10 +; RV32-NEXT: .LBB22_20: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: fsd ft0, 80(sp) ; RV32-NEXT: bnez a0, .LBB22_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 9 +; RV32-NEXT: j .LBB22_23 ; RV32-NEXT: .LBB22_22: -; RV32-NEXT: fsd ft1, 48(sp) ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 5 +; RV32-NEXT: vslidedown.vi v24, v8, 9 +; RV32-NEXT: .LBB22_23: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_24: -; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: fsd ft0, 72(sp) +; RV32-NEXT: bnez a0, .LBB22_25 +; RV32-NEXT: # %bb.24: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 4 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_26: -; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: vslidedown.vi v24, v16, 8 +; RV32-NEXT: j .LBB22_26 +; RV32-NEXT: .LBB22_25: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 3 +; RV32-NEXT: vslidedown.vi v24, v8, 8 +; RV32-NEXT: .LBB22_26: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v24 +; RV32-NEXT: fsd ft0, 64(sp) ; RV32-NEXT: bnez a0, .LBB22_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 7 +; RV32-NEXT: j .LBB22_29 ; RV32-NEXT: .LBB22_28: -; RV32-NEXT: fsd ft1, 24(sp) ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v24, v16, 2 +; RV32-NEXT: vslidedown.vi v24, v8, 7 +; RV32-NEXT: .LBB22_29: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB22_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB22_30: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: fsd ft0, 56(sp) +; RV32-NEXT: bnez a0, .LBB22_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v16, v16, 1 -; RV32-NEXT: vfmv.f.s ft0, v16 -; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v8 -; RV32-NEXT: bnez a0, .LBB22_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.d ft1, ft0 +; RV32-NEXT: vslidedown.vi v24, v16, 6 +; RV32-NEXT: j .LBB22_32 +; RV32-NEXT: .LBB22_31: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 6 ; RV32-NEXT: .LBB22_32: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 48(sp) +; RV32-NEXT: bnez a0, .LBB22_34 +; RV32-NEXT: # %bb.33: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 5 +; RV32-NEXT: j .LBB22_35 +; RV32-NEXT: .LBB22_34: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 5 +; RV32-NEXT: .LBB22_35: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB22_37 +; RV32-NEXT: # %bb.36: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 4 +; RV32-NEXT: j .LBB22_38 +; RV32-NEXT: .LBB22_37: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 4 +; RV32-NEXT: .LBB22_38: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB22_40 +; RV32-NEXT: # %bb.39: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 3 +; RV32-NEXT: j .LBB22_41 +; RV32-NEXT: .LBB22_40: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 3 +; RV32-NEXT: .LBB22_41: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB22_43 +; RV32-NEXT: # %bb.42: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v16, 2 +; RV32-NEXT: j .LBB22_44 +; RV32-NEXT: .LBB22_43: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 2 +; RV32-NEXT: .LBB22_44: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB22_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v16, 1 +; RV32-NEXT: j .LBB22_47 +; RV32-NEXT: .LBB22_46: +; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: .LBB22_47: +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -256 @@ -3221,164 +3522,181 @@ ; RV64-NEXT: addi s0, sp, 256 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -128 -; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v16 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB22_2 +; RV64-NEXT: bnez a0, .LBB22_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v16 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB22_4 ; RV64-NEXT: .LBB22_2: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v8, 15 +; RV64-NEXT: j .LBB22_5 +; RV64-NEXT: .LBB22_3: +; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB22_2 +; RV64-NEXT: .LBB22_4: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 15 +; RV64-NEXT: .LBB22_5: ; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: vslidedown.vi v24, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_4: -; RV64-NEXT: fsd ft1, 120(sp) +; RV64-NEXT: fsd ft0, 120(sp) +; RV64-NEXT: bnez a0, .LBB22_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 14 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_8 +; RV64-NEXT: .LBB22_7: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_6: -; RV64-NEXT: fsd ft1, 112(sp) +; RV64-NEXT: .LBB22_8: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 112(sp) +; RV64-NEXT: bnez a0, .LBB22_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 13 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_11 +; RV64-NEXT: .LBB22_10: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_8: -; RV64-NEXT: fsd ft1, 104(sp) +; RV64-NEXT: .LBB22_11: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 104(sp) +; RV64-NEXT: bnez a0, .LBB22_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 12 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_14 +; RV64-NEXT: .LBB22_13: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_10: -; RV64-NEXT: fsd ft1, 96(sp) +; RV64-NEXT: .LBB22_14: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 96(sp) +; RV64-NEXT: bnez a0, .LBB22_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 11 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_17 +; RV64-NEXT: .LBB22_16: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_12: -; RV64-NEXT: fsd ft1, 88(sp) +; RV64-NEXT: .LBB22_17: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 88(sp) +; RV64-NEXT: bnez a0, .LBB22_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 10 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_20 +; RV64-NEXT: .LBB22_19: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_14: -; RV64-NEXT: fsd ft1, 80(sp) +; RV64-NEXT: .LBB22_20: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 80(sp) +; RV64-NEXT: bnez a0, .LBB22_22 +; RV64-NEXT: # %bb.21: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 9 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_23 +; RV64-NEXT: .LBB22_22: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_16: -; RV64-NEXT: fsd ft1, 72(sp) +; RV64-NEXT: .LBB22_23: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 72(sp) +; RV64-NEXT: bnez a0, .LBB22_25 +; RV64-NEXT: # %bb.24: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 8 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_26 +; RV64-NEXT: .LBB22_25: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_18: -; RV64-NEXT: fsd ft1, 64(sp) +; RV64-NEXT: .LBB22_26: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 64(sp) +; RV64-NEXT: bnez a0, .LBB22_28 +; RV64-NEXT: # %bb.27: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 7 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_29 +; RV64-NEXT: .LBB22_28: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_20: -; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: .LBB22_29: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB22_31 +; RV64-NEXT: # %bb.30: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 6 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_32 +; RV64-NEXT: .LBB22_31: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_22: -; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: .LBB22_32: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB22_34 +; RV64-NEXT: # %bb.33: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 5 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_35 +; RV64-NEXT: .LBB22_34: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_24: -; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: .LBB22_35: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB22_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 4 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_38 +; RV64-NEXT: .LBB22_37: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_26: -; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: .LBB22_38: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB22_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 3 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB22_41 +; RV64-NEXT: .LBB22_40: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_28: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: .LBB22_41: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB22_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 2 -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: vslidedown.vi v24, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB22_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_30: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB22_44 +; RV64-NEXT: .LBB22_43: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV64-NEXT: vslidedown.vi v16, v16, 1 -; RV64-NEXT: vfmv.f.s ft0, v16 +; RV64-NEXT: vslidedown.vi v24, v8, 2 +; RV64-NEXT: .LBB22_44: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB22_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v8, v16, 1 +; RV64-NEXT: j .LBB22_47 +; RV64-NEXT: .LBB22_46: +; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v8 -; RV64-NEXT: bnez a0, .LBB22_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB22_32: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB22_47: +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -256 @@ -3403,164 +3721,181 @@ ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -128 ; RV32-NEXT: feq.d a0, fa0, fa1 -; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu -; RV32-NEXT: vfmv.f.s ft1, v16 -; RV32-NEXT: vfmv.f.s ft0, v8 -; RV32-NEXT: bnez a0, .LBB23_2 +; RV32-NEXT: bnez a0, .LBB23_3 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fmv.d ft0, ft1 +; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v16 +; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: beqz a0, .LBB23_4 ; RV32-NEXT: .LBB23_2: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v24, v8, 15 +; RV32-NEXT: j .LBB23_5 +; RV32-NEXT: .LBB23_3: +; RV32-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV32-NEXT: vfmv.f.s ft0, v8 ; RV32-NEXT: fsd ft0, 0(sp) +; RV32-NEXT: bnez a0, .LBB23_2 +; RV32-NEXT: .LBB23_4: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 15 +; RV32-NEXT: .LBB23_5: ; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 15 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_4: -; RV32-NEXT: fsd ft1, 120(sp) +; RV32-NEXT: fsd ft0, 120(sp) +; RV32-NEXT: bnez a0, .LBB23_7 +; RV32-NEXT: # %bb.6: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 14 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_8 +; RV32-NEXT: .LBB23_7: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 14 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_6: -; RV32-NEXT: fsd ft1, 112(sp) +; RV32-NEXT: .LBB23_8: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 112(sp) +; RV32-NEXT: bnez a0, .LBB23_10 +; RV32-NEXT: # %bb.9: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 13 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_11 +; RV32-NEXT: .LBB23_10: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 13 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_8 -; RV32-NEXT: # %bb.7: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_8: -; RV32-NEXT: fsd ft1, 104(sp) +; RV32-NEXT: .LBB23_11: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 104(sp) +; RV32-NEXT: bnez a0, .LBB23_13 +; RV32-NEXT: # %bb.12: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 12 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_14 +; RV32-NEXT: .LBB23_13: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 12 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_10 -; RV32-NEXT: # %bb.9: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_10: -; RV32-NEXT: fsd ft1, 96(sp) +; RV32-NEXT: .LBB23_14: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 96(sp) +; RV32-NEXT: bnez a0, .LBB23_16 +; RV32-NEXT: # %bb.15: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 11 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_17 +; RV32-NEXT: .LBB23_16: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 11 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_12 -; RV32-NEXT: # %bb.11: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_12: -; RV32-NEXT: fsd ft1, 88(sp) +; RV32-NEXT: .LBB23_17: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 88(sp) +; RV32-NEXT: bnez a0, .LBB23_19 +; RV32-NEXT: # %bb.18: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 10 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_20 +; RV32-NEXT: .LBB23_19: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 10 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_14 -; RV32-NEXT: # %bb.13: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_14: -; RV32-NEXT: fsd ft1, 80(sp) +; RV32-NEXT: .LBB23_20: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 80(sp) +; RV32-NEXT: bnez a0, .LBB23_22 +; RV32-NEXT: # %bb.21: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 9 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_23 +; RV32-NEXT: .LBB23_22: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 9 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_16 -; RV32-NEXT: # %bb.15: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_16: -; RV32-NEXT: fsd ft1, 72(sp) +; RV32-NEXT: .LBB23_23: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 72(sp) +; RV32-NEXT: bnez a0, .LBB23_25 +; RV32-NEXT: # %bb.24: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 8 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_26 +; RV32-NEXT: .LBB23_25: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 8 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_18 -; RV32-NEXT: # %bb.17: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_18: -; RV32-NEXT: fsd ft1, 64(sp) +; RV32-NEXT: .LBB23_26: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 64(sp) +; RV32-NEXT: bnez a0, .LBB23_28 +; RV32-NEXT: # %bb.27: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 7 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_29 +; RV32-NEXT: .LBB23_28: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 7 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_20 -; RV32-NEXT: # %bb.19: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_20: -; RV32-NEXT: fsd ft1, 56(sp) +; RV32-NEXT: .LBB23_29: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 56(sp) +; RV32-NEXT: bnez a0, .LBB23_31 +; RV32-NEXT: # %bb.30: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 6 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_32 +; RV32-NEXT: .LBB23_31: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 6 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_22 -; RV32-NEXT: # %bb.21: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_22: -; RV32-NEXT: fsd ft1, 48(sp) +; RV32-NEXT: .LBB23_32: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 48(sp) +; RV32-NEXT: bnez a0, .LBB23_34 +; RV32-NEXT: # %bb.33: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 5 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_35 +; RV32-NEXT: .LBB23_34: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 5 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_24 -; RV32-NEXT: # %bb.23: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_24: -; RV32-NEXT: fsd ft1, 40(sp) +; RV32-NEXT: .LBB23_35: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 40(sp) +; RV32-NEXT: bnez a0, .LBB23_37 +; RV32-NEXT: # %bb.36: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 4 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_38 +; RV32-NEXT: .LBB23_37: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 4 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_26 -; RV32-NEXT: # %bb.25: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_26: -; RV32-NEXT: fsd ft1, 32(sp) +; RV32-NEXT: .LBB23_38: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 32(sp) +; RV32-NEXT: bnez a0, .LBB23_40 +; RV32-NEXT: # %bb.39: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 3 -; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: j .LBB23_41 +; RV32-NEXT: .LBB23_40: +; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 3 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_28 -; RV32-NEXT: # %bb.27: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_28: -; RV32-NEXT: fsd ft1, 24(sp) +; RV32-NEXT: .LBB23_41: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 24(sp) +; RV32-NEXT: bnez a0, .LBB23_43 +; RV32-NEXT: # %bb.42: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 2 -; RV32-NEXT: vfmv.f.s ft0, v24 -; RV32-NEXT: vslidedown.vi v24, v8, 2 -; RV32-NEXT: vfmv.f.s ft1, v24 -; RV32-NEXT: bnez a0, .LBB23_30 -; RV32-NEXT: # %bb.29: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_30: -; RV32-NEXT: fsd ft1, 16(sp) +; RV32-NEXT: j .LBB23_44 +; RV32-NEXT: .LBB23_43: ; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV32-NEXT: vslidedown.vi v16, v16, 1 -; RV32-NEXT: vfmv.f.s ft0, v16 +; RV32-NEXT: vslidedown.vi v24, v8, 2 +; RV32-NEXT: .LBB23_44: +; RV32-NEXT: vfmv.f.s ft0, v24 +; RV32-NEXT: fsd ft0, 16(sp) +; RV32-NEXT: bnez a0, .LBB23_46 +; RV32-NEXT: # %bb.45: +; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu +; RV32-NEXT: vslidedown.vi v8, v16, 1 +; RV32-NEXT: j .LBB23_47 +; RV32-NEXT: .LBB23_46: +; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v8, v8, 1 -; RV32-NEXT: vfmv.f.s ft1, v8 -; RV32-NEXT: bnez a0, .LBB23_32 -; RV32-NEXT: # %bb.31: -; RV32-NEXT: fmv.d ft1, ft0 -; RV32-NEXT: .LBB23_32: -; RV32-NEXT: fsd ft1, 8(sp) +; RV32-NEXT: .LBB23_47: +; RV32-NEXT: vfmv.f.s ft0, v8 +; RV32-NEXT: fsd ft0, 8(sp) ; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV32-NEXT: vle64.v v8, (sp) ; RV32-NEXT: addi sp, s0, -256 @@ -3581,164 +3916,181 @@ ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -128 ; RV64-NEXT: feq.d a0, fa0, fa1 -; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu -; RV64-NEXT: vfmv.f.s ft1, v16 -; RV64-NEXT: vfmv.f.s ft0, v8 -; RV64-NEXT: bnez a0, .LBB23_2 +; RV64-NEXT: bnez a0, .LBB23_3 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fmv.d ft0, ft1 +; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v16 +; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: beqz a0, .LBB23_4 ; RV64-NEXT: .LBB23_2: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v24, v8, 15 +; RV64-NEXT: j .LBB23_5 +; RV64-NEXT: .LBB23_3: +; RV64-NEXT: vsetvli zero, zero, e64,m8,ta,mu +; RV64-NEXT: vfmv.f.s ft0, v8 ; RV64-NEXT: fsd ft0, 0(sp) +; RV64-NEXT: bnez a0, .LBB23_2 +; RV64-NEXT: .LBB23_4: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 15 +; RV64-NEXT: .LBB23_5: ; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: vslidedown.vi v24, v8, 15 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_4: -; RV64-NEXT: fsd ft1, 120(sp) +; RV64-NEXT: fsd ft0, 120(sp) +; RV64-NEXT: bnez a0, .LBB23_7 +; RV64-NEXT: # %bb.6: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 14 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_8 +; RV64-NEXT: .LBB23_7: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 14 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_6: -; RV64-NEXT: fsd ft1, 112(sp) +; RV64-NEXT: .LBB23_8: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 112(sp) +; RV64-NEXT: bnez a0, .LBB23_10 +; RV64-NEXT: # %bb.9: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 13 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_11 +; RV64-NEXT: .LBB23_10: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 13 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_8 -; RV64-NEXT: # %bb.7: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_8: -; RV64-NEXT: fsd ft1, 104(sp) +; RV64-NEXT: .LBB23_11: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 104(sp) +; RV64-NEXT: bnez a0, .LBB23_13 +; RV64-NEXT: # %bb.12: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 12 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_14 +; RV64-NEXT: .LBB23_13: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 12 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_10 -; RV64-NEXT: # %bb.9: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_10: -; RV64-NEXT: fsd ft1, 96(sp) +; RV64-NEXT: .LBB23_14: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 96(sp) +; RV64-NEXT: bnez a0, .LBB23_16 +; RV64-NEXT: # %bb.15: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 11 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_17 +; RV64-NEXT: .LBB23_16: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 11 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_12 -; RV64-NEXT: # %bb.11: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_12: -; RV64-NEXT: fsd ft1, 88(sp) +; RV64-NEXT: .LBB23_17: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 88(sp) +; RV64-NEXT: bnez a0, .LBB23_19 +; RV64-NEXT: # %bb.18: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 10 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_20 +; RV64-NEXT: .LBB23_19: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 10 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_14 -; RV64-NEXT: # %bb.13: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_14: -; RV64-NEXT: fsd ft1, 80(sp) +; RV64-NEXT: .LBB23_20: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 80(sp) +; RV64-NEXT: bnez a0, .LBB23_22 +; RV64-NEXT: # %bb.21: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 9 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_23 +; RV64-NEXT: .LBB23_22: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 9 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_16 -; RV64-NEXT: # %bb.15: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_16: -; RV64-NEXT: fsd ft1, 72(sp) +; RV64-NEXT: .LBB23_23: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 72(sp) +; RV64-NEXT: bnez a0, .LBB23_25 +; RV64-NEXT: # %bb.24: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 8 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_26 +; RV64-NEXT: .LBB23_25: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 8 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_18 -; RV64-NEXT: # %bb.17: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_18: -; RV64-NEXT: fsd ft1, 64(sp) +; RV64-NEXT: .LBB23_26: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 64(sp) +; RV64-NEXT: bnez a0, .LBB23_28 +; RV64-NEXT: # %bb.27: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 7 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_29 +; RV64-NEXT: .LBB23_28: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 7 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_20 -; RV64-NEXT: # %bb.19: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_20: -; RV64-NEXT: fsd ft1, 56(sp) +; RV64-NEXT: .LBB23_29: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 56(sp) +; RV64-NEXT: bnez a0, .LBB23_31 +; RV64-NEXT: # %bb.30: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 6 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_32 +; RV64-NEXT: .LBB23_31: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 6 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_22 -; RV64-NEXT: # %bb.21: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_22: -; RV64-NEXT: fsd ft1, 48(sp) +; RV64-NEXT: .LBB23_32: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 48(sp) +; RV64-NEXT: bnez a0, .LBB23_34 +; RV64-NEXT: # %bb.33: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 5 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_35 +; RV64-NEXT: .LBB23_34: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 5 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_24 -; RV64-NEXT: # %bb.23: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_24: -; RV64-NEXT: fsd ft1, 40(sp) +; RV64-NEXT: .LBB23_35: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 40(sp) +; RV64-NEXT: bnez a0, .LBB23_37 +; RV64-NEXT: # %bb.36: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 4 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_38 +; RV64-NEXT: .LBB23_37: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 4 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_26 -; RV64-NEXT: # %bb.25: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_26: -; RV64-NEXT: fsd ft1, 32(sp) +; RV64-NEXT: .LBB23_38: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 32(sp) +; RV64-NEXT: bnez a0, .LBB23_40 +; RV64-NEXT: # %bb.39: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 3 -; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: j .LBB23_41 +; RV64-NEXT: .LBB23_40: +; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 3 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_28 -; RV64-NEXT: # %bb.27: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_28: -; RV64-NEXT: fsd ft1, 24(sp) +; RV64-NEXT: .LBB23_41: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 24(sp) +; RV64-NEXT: bnez a0, .LBB23_43 +; RV64-NEXT: # %bb.42: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 2 -; RV64-NEXT: vfmv.f.s ft0, v24 -; RV64-NEXT: vslidedown.vi v24, v8, 2 -; RV64-NEXT: vfmv.f.s ft1, v24 -; RV64-NEXT: bnez a0, .LBB23_30 -; RV64-NEXT: # %bb.29: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_30: -; RV64-NEXT: fsd ft1, 16(sp) +; RV64-NEXT: j .LBB23_44 +; RV64-NEXT: .LBB23_43: ; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu -; RV64-NEXT: vslidedown.vi v16, v16, 1 -; RV64-NEXT: vfmv.f.s ft0, v16 +; RV64-NEXT: vslidedown.vi v24, v8, 2 +; RV64-NEXT: .LBB23_44: +; RV64-NEXT: vfmv.f.s ft0, v24 +; RV64-NEXT: fsd ft0, 16(sp) +; RV64-NEXT: bnez a0, .LBB23_46 +; RV64-NEXT: # %bb.45: +; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu +; RV64-NEXT: vslidedown.vi v8, v16, 1 +; RV64-NEXT: j .LBB23_47 +; RV64-NEXT: .LBB23_46: +; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v8, 1 -; RV64-NEXT: vfmv.f.s ft1, v8 -; RV64-NEXT: bnez a0, .LBB23_32 -; RV64-NEXT: # %bb.31: -; RV64-NEXT: fmv.d ft1, ft0 -; RV64-NEXT: .LBB23_32: -; RV64-NEXT: fsd ft1, 8(sp) +; RV64-NEXT: .LBB23_47: +; RV64-NEXT: vfmv.f.s ft0, v8 +; RV64-NEXT: fsd ft0, 8(sp) ; RV64-NEXT: vsetivli a0, 16, e64,m8,ta,mu ; RV64-NEXT: vle64.v v8, (sp) ; RV64-NEXT: addi sp, s0, -256 diff --git a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll --- a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll @@ -15,8 +15,7 @@ ; CHECK: bb.0.entry: ; CHECK: liveins: $v8 ; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v8 - ; CHECK: dead %2:gpr = PseudoVSETIVLI 1, 88, implicit-def $vl, implicit-def $vtype - ; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoVSE64_V_M1 [[COPY]], %stack.0.a, 1, 6 ; CHECK: [[LD:%[0-9]+]]:gpr = LD %stack.0.a, 0 :: (dereferenceable load 8 from %ir.a) ; CHECK: $x10 = COPY [[LD]] ; CHECK: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir --- a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir +++ b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir @@ -15,20 +15,21 @@ liveins: $v0, $v1, $v2, $v3 ; CHECK-LABEL: name: mask_reg_alloc ; CHECK: liveins: $v0, $v1, $v2, $v3 - ; CHECK: renamable $v25 = PseudoVMERGE_VIM_M1 killed renamable $v2, 1, killed renamable $v0, $noreg, -1, implicit $vl, implicit $vtype + ; CHECK: dead renamable $x10 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype + ; CHECK: renamable $v25 = PseudoVMERGE_VIM_M1 killed renamable $v2, 1, killed renamable $v0, 1, 3, implicit $vl, implicit $vtype ; CHECK: renamable $v0 = COPY killed renamable $v1 - ; CHECK: renamable $v26 = PseudoVMERGE_VIM_M1 killed renamable $v3, 1, killed renamable $v0, $noreg, -1, implicit $vl, implicit $vtype - ; CHECK: renamable $v0 = PseudoVADD_VV_M1 killed renamable $v25, killed renamable $v26, $noreg, -1, implicit $vl, implicit $vtype + ; CHECK: renamable $v26 = PseudoVMERGE_VIM_M1 killed renamable $v3, 1, killed renamable $v0, 1, 3, implicit $vl, implicit $vtype + ; CHECK: renamable $v0 = PseudoVADD_VV_M1 killed renamable $v25, killed renamable $v26, 1, 3, implicit $vl, implicit $vtype ; CHECK: PseudoRET implicit $v0 %0:vr = COPY $v0 %1:vr = COPY $v1 %2:vr = COPY $v2 %3:vr = COPY $v3 %4:vmv0 = COPY %0 - %5:vrnov0 = PseudoVMERGE_VIM_M1 killed %2, 1, %4, $noreg, -1, implicit $vl, implicit $vtype + %5:vrnov0 = PseudoVMERGE_VIM_M1 killed %2, 1, %4, 1, 3 %6:vmv0 = COPY %1 - %7:vrnov0 = PseudoVMERGE_VIM_M1 killed %3, 1, %6, $noreg, -1, implicit $vl, implicit $vtype - %8:vr = PseudoVADD_VV_M1 killed %5, killed %7, $noreg, -1, implicit $vl, implicit $vtype + %7:vrnov0 = PseudoVMERGE_VIM_M1 killed %3, 1, %6, 1, 3 + %8:vr = PseudoVADD_VV_M1 killed %5, killed %7, 1, 3 $v0 = COPY %8 PseudoRET implicit $v0 ... diff --git a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir --- a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir +++ b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir @@ -52,8 +52,7 @@ ; CHECK: $v0 = COPY [[COPY]] ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK: [[COPY2:%[0-9]+]]:vrm8nov0 = COPY [[DEF]] - ; CHECK: dead %5:gpr = PseudoVSETVLI $x0, 91, implicit-def $vl, implicit-def $vtype - ; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $noreg, 6, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8) + ; CHECK: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK [[COPY2]], [[COPY1]], $v0, $x0, 6 :: (load 64 from %ir.a, align 8) ; CHECK: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]] ; CHECK: PseudoRET implicit $v8m8 %1:vr = COPY $v0 @@ -61,7 +60,7 @@ $v0 = COPY %1 %3:vrm8 = IMPLICIT_DEF %4:vrm8nov0 = COPY %3 - %2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 6, implicit $vl, implicit $vtype :: (load 64 from %ir.a, align 8) + %2:vrm8nov0 = PseudoVLE64_V_M8_MASK %4, %0, $v0, $x0, 6 :: (load 64 from %ir.a, align 8) $v8m8 = COPY %2 PseudoRET implicit $v8m8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -0,0 +1,354 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \ +# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.ll" + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "riscv64" + + ; Function Attrs: nounwind + define @add( %0, %1, i64 %2) #0 { + entry: + %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %0, %1, i64 %2) + ret %a + } + + ; Function Attrs: nounwind + define @load_add(* %0, %1, i64 %2) #0 { + entry: + %a = call @llvm.riscv.vle.nxv1i64.i64(* %0, i64 %2) + %b = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %a, %1, i64 %2) + ret %b + } + + ; Function Attrs: nounwind + define @load_zext(* %0, i64 %1) #0 { + entry: + %a = call @llvm.riscv.vle.nxv1i32.i64(* %0, i64 %1) + %b = call @llvm.riscv.vzext.nxv1i64.nxv1i32.i64( %a, i64 %1) + ret %b + } + + ; Function Attrs: nounwind readnone + declare i64 @llvm.riscv.vmv.x.s.nxv1i64() #1 + + ; Function Attrs: nounwind + define i64 @vmv_x_s( %0) #0 { + entry: + %a = call i64 @llvm.riscv.vmv.x.s.nxv1i64( %0) + ret i64 %a + } + + define void @add_v2i64(<2 x i64>* %x, <2 x i64>* %y) #2 { + %a = load <2 x i64>, <2 x i64>* %x, align 16 + %b = load <2 x i64>, <2 x i64>* %y, align 16 + %c = add <2 x i64> %a, %b + store <2 x i64> %c, <2 x i64>* %x, align 16 + ret void + } + + ; Function Attrs: nofree nosync nounwind readnone willreturn + declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #3 + + define i64 @vreduce_add_v2i64(<2 x i64>* %x) #2 { + %v = load <2 x i64>, <2 x i64>* %x, align 16 + %red = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %v) + ret i64 %red + } + + ; Function Attrs: nounwind + declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #0 + + ; Function Attrs: nounwind + define @vsetvli_add( %0, %1, i64 %avl) #0 { + entry: + %a = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 1) + %b = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %0, %1, i64 %a) + ret %b + } + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(, , i64) #1 + + ; Function Attrs: nounwind readonly + declare @llvm.riscv.vle.nxv1i64.i64(* nocapture, i64) #2 + + ; Function Attrs: nounwind readonly + declare @llvm.riscv.vle.nxv1i32.i64(* nocapture, i64) #2 + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(, i64) #1 + + attributes #0 = { nounwind "target-features"="+experimental-v" } + attributes #1 = { nounwind readnone "target-features"="+experimental-v" } + attributes #2 = { "target-features"="+experimental-v" } + attributes #3 = { nofree nosync nounwind readnone willreturn "target-features"="+experimental-v" } + attributes #4 = { nounwind readonly "target-features"="+experimental-v" } + +... +--- +name: add +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: gpr } + - { id: 3, class: vr } +liveins: + - { reg: '$v8', virtual-reg: '%0' } + - { reg: '$v9', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8, $v9, $x10 + + ; CHECK-LABEL: name: add + ; CHECK: liveins: $v8, $v9, $x10 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK: dead %4:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]] + ; CHECK: PseudoRET implicit $v8 + %2:gpr = COPY $x10 + %1:vr = COPY $v9 + %0:vr = COPY $v8 + %3:vr = PseudoVADD_VV_M1 %0, %1, %2, 6 + $v8 = COPY %3 + PseudoRET implicit $v8 + +... +--- +name: load_add +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: vr } + - { id: 2, class: gpr } + - { id: 3, class: vr } + - { id: 4, class: vr } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$v8', virtual-reg: '%1' } + - { reg: '$x11', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10, $v8, $x11 + + ; CHECK-LABEL: name: load_add + ; CHECK: liveins: $x10, $v8, $x11 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v8 + ; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %5:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]] + ; CHECK: PseudoRET implicit $v8 + %2:gpr = COPY $x11 + %1:vr = COPY $v8 + %0:gpr = COPY $x10 + %3:vr = PseudoVLE64_V_M1 %0, %2, 6 + %4:vr = PseudoVADD_VV_M1 killed %3, %1, %2, 6 + $v8 = COPY %4 + PseudoRET implicit $v8 + +... +--- +name: load_zext +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: vr } + - { id: 3, class: vr } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: load_zext + ; CHECK: liveins: $x10, $x11 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %4:gpr = PseudoVSETVLI [[COPY]], 87, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY1]], $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: dead %5:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: early-clobber %3:vr = PseudoVZEXT_VF2_M1 killed [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: $v8 = COPY %3 + ; CHECK: PseudoRET implicit $v8 + %1:gpr = COPY $x11 + %0:gpr = COPY $x10 + %2:vr = PseudoVLE32_V_MF2 %0, %1, 5 + early-clobber %3:vr = PseudoVZEXT_VF2_M1 killed %2, %1, 6 + $v8 = COPY %3 + PseudoRET implicit $v8 + +... +--- +name: vmv_x_s +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: gpr } +liveins: + - { reg: '$v8', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8 + + ; CHECK-LABEL: name: vmv_x_s + ; CHECK: liveins: $v8 + ; CHECK: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; CHECK: dead $x0 = PseudoVSETVLI killed $x0, 88, implicit-def $vl, implicit-def $vtype, implicit $vl + ; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[COPY]], 6, implicit $vtype + ; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]] + ; CHECK: PseudoRET implicit $x10 + %0:vr = COPY $v8 + %1:gpr = PseudoVMV_X_S_M1 %0, 6 + $x10 = COPY %1 + PseudoRET implicit $x10 + +... +--- +name: add_v2i64 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: vr } + - { id: 3, class: vr } + - { id: 4, class: vr } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $x10, $x11 + + ; CHECK-LABEL: name: add_v2i64 + ; CHECK: liveins: $x10, $x11 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %5:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.x) + ; CHECK: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.y) + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6, implicit $vl, implicit $vtype :: (store 16 into %ir.x) + ; CHECK: PseudoRET + %1:gpr = COPY $x11 + %0:gpr = COPY $x10 + %2:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load 16 from %ir.x) + %3:vr = PseudoVLE64_V_M1 %1, 2, 6 :: (load 16 from %ir.y) + %4:vr = PseudoVADD_VV_M1 killed %2, killed %3, 2, 6 + PseudoVSE64_V_M1 killed %4, %0, 2, 6 :: (store 16 into %ir.x) + PseudoRET + +... +--- +name: vreduce_add_v2i64 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: vr } + - { id: 2, class: vr } + - { id: 3, class: vr } + - { id: 4, class: vr } + - { id: 5, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $x10 + + ; CHECK-LABEL: name: vreduce_add_v2i64 + ; CHECK: liveins: $x10 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %6:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6, implicit $vl, implicit $vtype :: (load 16 from %ir.x) + ; CHECK: dead %7:gpr = PseudoVSETVLI $x0, 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 0, $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK: dead %8:gpr = PseudoVSETIVLI 2, 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVREDSUM_VS_M1_:%[0-9]+]]:vr = PseudoVREDSUM_VS_M1 [[DEF]], killed [[PseudoVLE64_V_M1_]], killed [[PseudoVMV_V_I_M1_]], 2, 6, implicit $vl, implicit $vtype + ; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 killed [[PseudoVREDSUM_VS_M1_]], 6, implicit $vtype + ; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]] + ; CHECK: PseudoRET implicit $x10 + %0:gpr = COPY $x10 + %1:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load 16 from %ir.x) + %2:vr = PseudoVMV_V_I_M1 0, $x0, 6 + %4:vr = IMPLICIT_DEF + %3:vr = PseudoVREDSUM_VS_M1 %4, killed %1, killed %2, 2, 6 + %5:gpr = PseudoVMV_X_S_M1 killed %3, 6 + $x10 = COPY %5 + PseudoRET implicit $x10 + +... +--- +name: vsetvli_add +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: vr } +liveins: + - { reg: '$v8', virtual-reg: '%0' } + - { reg: '$v9', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8, $v9, $x10 + + ; CHECK-LABEL: name: vsetvli_add + ; CHECK: liveins: $v8, $v9, $x10 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def dead $vl, implicit-def dead $vtype + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: $v8 = COPY [[PseudoVADD_VV_M1_]] + ; CHECK: PseudoRET implicit $v8 + %2:gpr = COPY $x10 + %1:vr = COPY $v9 + %0:vr = COPY $v8 + %3:gpr = PseudoVSETVLI %2, 88, implicit-def dead $vl, implicit-def dead $vtype + %4:vr = PseudoVADD_VV_M1 %0, %1, killed %3, 6 + $v8 = COPY %4 + PseudoRET implicit $v8 + +... diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=riscv64 -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s +# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s --- | target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" @@ -41,7 +41,7 @@ ; CHECK: PseudoRET %0:gpr = COPY $x10 %1:gpr = COPY $x11 - $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 %0, %1, 6, implicit $vl, implicit $vtype + $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 %0, %1, 6 PseudoVSPILL7_M1 killed renamable $v0_v1_v2_v3_v4_v5_v6, %stack.0, $x0 renamable $v7_v8_v9_v10_v11_v12_v13 = PseudoVRELOAD7_M1 %stack.0, $x0 VS1R_V killed $v8, %0:gpr