diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -9,13 +9,17 @@ // This file implements a function pass that inserts VSETVLI instructions where // needed. // -// The pass consists of a single pass over each basic block looking for changes -// in VL/VTYPE usage that requires a vsetvli to be inserted. We assume the -// VL/VTYPE values are unknown from predecessors so the first vector instruction -// will always require a new VSETVLI. +// This pass consists of 3 phases: // -// TODO: Future enhancements to this pass will take into account VL/VTYPE from -// predecessors. +// Phase 1 collects how each basic block affects VL/VTYPE. +// +// Phase 2 uses the information from phase 1 to do a data flow analysis to +// propagate the VL/VTYPE changes through the function. This gives us the +// VL/VTYPE at the start of each basic block. +// +// Phase 3 inserts VSETVLI instructions in each basic block. Information from +// phase 2 is used to prevent inserting a VSETVLI before the first vector +// instruction in the block if possible. // //===----------------------------------------------------------------------===// @@ -23,6 +27,7 @@ #include "RISCVSubtarget.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include using namespace llvm; #define DEBUG_TYPE "riscv-insert-vsetvli" @@ -52,6 +57,12 @@ public: VSETVLIInfo() : AVLImm(0) {} + static VSETVLIInfo getUnknown() { + VSETVLIInfo Info; + Info.setUnknown(); + return Info; + } + bool isValid() const { return State != Uninitialized; } void setUnknown() { State = Unknown; } bool isUnknown() const { return State == Unknown; } @@ -148,12 +159,89 @@ return getAVLReg() == Other.getAVLReg(); } + + bool operator==(const VSETVLIInfo &Other) const { + // Uninitialized is only equal to another Uninitialized. + if (!isValid()) + return !Other.isValid(); + if (!Other.isValid()) + return !isValid(); + + // Unknown is only equal to another Unknown. + if (isUnknown()) + return Other.isUnknown(); + if (Other.isUnknown()) + return isUnknown(); + + // Otherwise compare the VTYPE and AVL. + return hasSameVTYPE(Other) && hasSameAVL(Other); + } + + bool operator!=(const VSETVLIInfo &Other) const { return !(*this == Other); } + + // Calculate the VSETVLIInfo visible to a block assuming this and Other are + // both predecessors. + VSETVLIInfo intersect(const VSETVLIInfo &Other) const { + // If the new value isn't valid, ignore it. + if (!Other.isValid()) + return *this; + + // If this value isn't valid, this must be the first predecessor, use it. + if (!isValid()) + return Other; + + if (*this == Other) + return *this; + + // If the configurations don't match, assume unknown. + return VSETVLIInfo::getUnknown(); + } + + // Calculate the VSETVLIInfo visible at the end of the block assuming this + // is the predecessor value, and Other is change for this block. + VSETVLIInfo merge(const VSETVLIInfo &Other) const { + assert(isValid() && "Can only merge with a valid VSETVLInfo"); + + // Nothing changed from the predecessor, keep it. + if (!Other.isValid()) + return *this; + + // If the change is compatible with the input, we won't create a VSETVLI + // and should keep the predecessor. + if (isCompatible(Other)) + return *this; + + // Otherwise just use whatever is in this block. + return Other; + } +}; + +struct BlockData { + // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers + // made by this block. Calculated in Phase 1. + VSETVLIInfo Change; + + // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this + // block. Calculated in Phase 2. + VSETVLIInfo Exit; + + // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor + // blocks. Calculated in Phase 2, and used by Phase 3. + VSETVLIInfo Pred; + + // Keeps track of whether the block is already in the queue. + bool InQueue = false; + + BlockData() {} }; class RISCVInsertVSETVLI : public MachineFunctionPass { const TargetInstrInfo *TII; MachineRegisterInfo *MRI; + std::vector BlockInfo; + std::queue WorkList; + public: static char ID; @@ -170,10 +258,13 @@ StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } private: + bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo); void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, const VSETVLIInfo &Info); - bool emitVSETVLIs(MachineBasicBlock &MBB); + bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); + void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); + void emitVSETVLIs(MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -276,7 +367,7 @@ // Return a VSETVLIInfo representing the changes made by this VSETVLI or // VSETIVLI instruction. -VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { +static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { VSETVLIInfo NewInfo; if (MI.getOpcode() == RISCV::PseudoVSETVLI) { Register AVLReg = MI.getOperand(1).getReg(); @@ -292,12 +383,111 @@ return NewInfo; } -bool RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { - bool MadeChange = false; +bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require, + const VSETVLIInfo &CurInfo) { + if (CurInfo.isCompatible(Require)) + return false; + + // We didn't find a compatible value. If our AVL is a virtual register, + // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need + // and the last VL/VTYPE we observed is the same, we don't need a + // VSETVLI here. + if (!CurInfo.isUnknown() && Require.hasAVLReg() && + Require.getAVLReg().isVirtual() && Require.hasSameVTYPE(CurInfo)) { + if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { + if (DefMI->getOpcode() == RISCV::PseudoVSETVLI || + DefMI->getOpcode() == RISCV::PseudoVSETIVLI) { + VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); + if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo)) + return false; + } + } + } + + return true; +} + +bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { + bool HadVectorOp = false; + + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + for (const MachineInstr &MI : MBB) { + // If this is an explicit VSETVLI or VSETIVLI, update our state. + if (MI.getOpcode() == RISCV::PseudoVSETVLI || + MI.getOpcode() == RISCV::PseudoVSETIVLI) { + HadVectorOp = true; + BBInfo.Change = getInfoForVSETVLI(MI); + continue; + } + + uint64_t TSFlags = MI.getDesc().TSFlags; + if (RISCVII::hasSEWOp(TSFlags)) { + HadVectorOp = true; + + VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); + + if (!BBInfo.Change.isValid()) { + BBInfo.Change = NewInfo; + } else { + // If this instruction isn't compatible with the previous VL/VTYPE + // we need to insert a VSETVLI. + if (needVSETVLI(NewInfo, BBInfo.Change)) + BBInfo.Change = NewInfo; + } + } + + // If this is something that updates VL/VTYPE that we don't know about, set + // the state to unknown. + if (MI.isCall() || MI.modifiesRegister(RISCV::VL) || + MI.modifiesRegister(RISCV::VTYPE)) { + BBInfo.Change = VSETVLIInfo::getUnknown(); + } + } + + // Initial exit state is whatever change we found in the block. + BBInfo.Exit = BBInfo.Change; + + return HadVectorOp; +} + +void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { + BlockData &BBInfo = BlockInfo[MBB.getNumber()]; + + BBInfo.InQueue = false; + + VSETVLIInfo InInfo; + if (MBB.pred_empty()) { + // There are no predecessors, so use the default starting status. + InInfo.setUnknown(); + } else { + for (MachineBasicBlock *P : MBB.predecessors()) + InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); + } + + // If we don't have any valid predecessor value, wait until we do. + if (!InInfo.isValid()) + return; - // Assume predecessor state is unknown. + BBInfo.Pred = InInfo; + + VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change); + + // If the new exit value matches the old exit value, we don't need to revisit + // any blocks. + if (BBInfo.Exit == TmpStatus) + return; + + BBInfo.Exit = TmpStatus; + + // Add the successors to the work list so we can propagate the changed exit + // status. + for (MachineBasicBlock *S : MBB.successors()) + if (!BlockInfo[S->getNumber()].InQueue) + WorkList.push(S); +} + +void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { VSETVLIInfo CurInfo; - CurInfo.setUnknown(); for (MachineInstr &MI : MBB) { // If this is an explicit VSETVLI or VSETIVLI, update our state. @@ -309,7 +499,6 @@ "Unexpected operands where VL and VTYPE should be"); MI.getOperand(3).setIsDead(false); MI.getOperand(4).setIsDead(false); - MadeChange = true; CurInfo = getInfoForVSETVLI(MI); continue; } @@ -330,47 +519,32 @@ MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, /*isImp*/ true)); - bool NeedVSETVLI = true; - if (CurInfo.isValid() && CurInfo.isCompatible(NewInfo)) - NeedVSETVLI = false; - - // We didn't find a compatible value. If our AVL is a virtual register, - // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need - // and the last VL/VTYPE we observed is the same, we don't need a - // VSETVLI here. - if (NeedVSETVLI && !CurInfo.isUnknown() && NewInfo.hasAVLReg() && - NewInfo.getAVLReg().isVirtual() && NewInfo.hasSameVTYPE(CurInfo)) { - if (MachineInstr *DefMI = MRI->getVRegDef(NewInfo.getAVLReg())) { - if (DefMI->getOpcode() == RISCV::PseudoVSETVLI || - DefMI->getOpcode() == RISCV::PseudoVSETIVLI) { - VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); - if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo)) - NeedVSETVLI = false; - } + if (!CurInfo.isValid()) { + // We haven't found any vector instructions or VL/VTYPE changes yet, + // use the predecessor information. + assert(BlockInfo[MBB.getNumber()].Pred.isValid() && + "Expected a valid predecessor state."); + if (needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred)) { + insertVSETVLI(MBB, MI, NewInfo); + CurInfo = NewInfo; + } + } else { + // If this instruction isn't compatible with the previous VL/VTYPE + // we need to insert a VSETVLI. + if (needVSETVLI(NewInfo, CurInfo)) { + insertVSETVLI(MBB, MI, NewInfo); + CurInfo = NewInfo; } } - - // If this instruction isn't compatible with the previous VL/VTYPE - // we need to insert a VSETVLI. - if (NeedVSETVLI) { - insertVSETVLI(MBB, MI, NewInfo); - CurInfo = NewInfo; - } - - // If we find an instruction we at least changed the operands. - MadeChange = true; } + // If this is something updates VL/VTYPE that we don't know about, set // the state to unknown. if (MI.isCall() || MI.modifiesRegister(RISCV::VL) || MI.modifiesRegister(RISCV::VTYPE)) { - VSETVLIInfo NewInfo; - NewInfo.setUnknown(); - CurInfo = NewInfo; + CurInfo = VSETVLIInfo::getUnknown(); } } - - return MadeChange; } bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { @@ -382,12 +556,41 @@ TII = ST.getInstrInfo(); MRI = &MF.getRegInfo(); - bool Changed = false; + assert(BlockInfo.empty() && "Expect empty block infos"); + BlockInfo.resize(MF.getNumBlockIDs()); + + bool HaveVectorOp = false; + + // Phase 1 - determine how VL/VTYPE are affected by the each block. + for (const MachineBasicBlock &MBB : MF) + HaveVectorOp |= computeVLVTYPEChanges(MBB); + + // If we didn't find any instructions that need VSETVLI, we're done. + if (HaveVectorOp) { + // Phase 2 - determine the exit VL/VTYPE from each block. We add all + // blocks to the list here, but will also add any that need to be revisited + // during Phase 2 processing. + for (const MachineBasicBlock &MBB : MF) { + WorkList.push(&MBB); + BlockInfo[MBB.getNumber()].InQueue = true; + } + while (!WorkList.empty()) { + const MachineBasicBlock &MBB = *WorkList.front(); + WorkList.pop(); + computeIncomingVLVTYPE(MBB); + } + + // Phase 3 - add any vsetvli instructions needed in the block. Use the + // Phase 2 information to avoid adding vsetvlis before the first vector + // instruction in the block if the VL/VTYPE is satisfied by its + // predecessors. + for (MachineBasicBlock &MBB : MF) + emitVSETVLIs(MBB); + } - for (MachineBasicBlock &MBB : MF) - Changed |= emitVSETVLIs(MBB); + BlockInfo.clear(); - return Changed; + return HaveVectorOp; } /// Returns an instance of the Insert VSETVLI pass. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -3667,11 +3667,10 @@ ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a2, 4112 -; LMULMAX2-RV32-NEXT: addi a2, a2, 257 +; LMULMAX2-RV32-NEXT: lui a1, 4112 +; LMULMAX2-RV32-NEXT: addi a2, a1, 257 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -3723,14 +3722,12 @@ ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB3_3: -; LMULMAX2-RV32-NEXT: sw a5, 0(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX2-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: sw a5, 0(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB3_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -3758,8 +3755,8 @@ ; LMULMAX2-RV32-NEXT: addi a1, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB3_6 ; LMULMAX2-RV32-NEXT: .LBB3_5: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -3902,11 +3899,10 @@ ; LMULMAX1-RV32-NEXT: addi a3, a1, 819 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi a7, a1, -241 -; LMULMAX1-RV32-NEXT: lui a2, 4112 -; LMULMAX1-RV32-NEXT: addi a2, a2, 257 +; LMULMAX1-RV32-NEXT: lui a1, 4112 +; LMULMAX1-RV32-NEXT: addi a2, a1, 257 ; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 @@ -3958,14 +3954,12 @@ ; LMULMAX1-RV32-NEXT: mul a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a5, a1, 24 ; LMULMAX1-RV32-NEXT: .LBB3_3: -; LMULMAX1-RV32-NEXT: sw a5, 0(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 -; LMULMAX1-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5 +; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: sw a5, 0(sp) +; LMULMAX1-RV32-NEXT: bnez a1, .LBB3_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: srli a5, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 @@ -3993,8 +3987,8 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB3_6 ; LMULMAX1-RV32-NEXT: .LBB3_5: -; LMULMAX1-RV32-NEXT: srli a1, a5, 1 -; LMULMAX1-RV32-NEXT: or a1, a5, a1 +; LMULMAX1-RV32-NEXT: srli a5, a1, 1 +; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a5 ; LMULMAX1-RV32-NEXT: srli a5, a1, 4 @@ -11124,11 +11118,10 @@ ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a2, 4112 -; LMULMAX2-RV32-NEXT: addi a2, a2, 257 +; LMULMAX2-RV32-NEXT: lui a1, 4112 +; LMULMAX2-RV32-NEXT: addi a2, a1, 257 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2 ; LMULMAX2-RV32-NEXT: # %bb.1: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -11180,14 +11173,12 @@ ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB7_3: -; LMULMAX2-RV32-NEXT: sw a5, 0(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 3 ; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30 +; LMULMAX2-RV32-NEXT: sw a5, 0(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -11215,8 +11206,8 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_6 ; LMULMAX2-RV32-NEXT: .LBB7_5: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11239,14 +11230,12 @@ ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB7_6: -; LMULMAX2-RV32-NEXT: sw a5, 24(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2 ; LMULMAX2-RV32-NEXT: vsrl.vx v30, v28, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v30 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v30 +; LMULMAX2-RV32-NEXT: sw a5, 24(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX2-RV32-NEXT: # %bb.7: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -11274,8 +11263,8 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_9 ; LMULMAX2-RV32-NEXT: .LBB7_8: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11298,14 +11287,12 @@ ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB7_9: -; LMULMAX2-RV32-NEXT: sw a5, 16(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a6 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 +; LMULMAX2-RV32-NEXT: sw a5, 16(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11 ; LMULMAX2-RV32-NEXT: # %bb.10: -; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 @@ -11333,8 +11320,8 @@ ; LMULMAX2-RV32-NEXT: addi a1, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_12 ; LMULMAX2-RV32-NEXT: .LBB7_11: -; LMULMAX2-RV32-NEXT: srli a1, a5, 1 -; LMULMAX2-RV32-NEXT: or a1, a5, a1 +; LMULMAX2-RV32-NEXT: srli a5, a1, 1 +; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 2 ; LMULMAX2-RV32-NEXT: or a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 4 @@ -11552,11 +11539,10 @@ ; LMULMAX1-RV32-NEXT: addi a4, a2, 819 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi t0, a2, -241 -; LMULMAX1-RV32-NEXT: lui a3, 4112 -; LMULMAX1-RV32-NEXT: addi a3, a3, 257 +; LMULMAX1-RV32-NEXT: lui a2, 4112 +; LMULMAX1-RV32-NEXT: addi a3, a2, 257 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2 ; LMULMAX1-RV32-NEXT: # %bb.1: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 @@ -11608,14 +11594,12 @@ ; LMULMAX1-RV32-NEXT: mul a1, a1, a3 ; LMULMAX1-RV32-NEXT: srli a1, a1, 24 ; LMULMAX1-RV32-NEXT: .LBB7_3: -; LMULMAX1-RV32-NEXT: sw a1, 16(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v26, v26, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v27, v26, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v27 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5 +; LMULMAX1-RV32-NEXT: vmv.x.s a2, v27 +; LMULMAX1-RV32-NEXT: sw a1, 16(sp) +; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 @@ -11643,8 +11627,8 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_6 ; LMULMAX1-RV32-NEXT: .LBB7_5: -; LMULMAX1-RV32-NEXT: srli a2, a1, 1 -; LMULMAX1-RV32-NEXT: or a1, a1, a2 +; LMULMAX1-RV32-NEXT: srli a1, a2, 1 +; LMULMAX1-RV32-NEXT: or a1, a2, a1 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 @@ -11669,13 +11653,11 @@ ; LMULMAX1-RV32-NEXT: .LBB7_6: ; LMULMAX1-RV32-NEXT: sw a1, 24(sp) ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) -; LMULMAX1-RV32-NEXT: sw zero, 4(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX1-RV32-NEXT: sw zero, 4(sp) ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX1-RV32-NEXT: # %bb.7: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 @@ -11727,14 +11709,12 @@ ; LMULMAX1-RV32-NEXT: mul a1, a1, a3 ; LMULMAX1-RV32-NEXT: srli a1, a1, 24 ; LMULMAX1-RV32-NEXT: .LBB7_9: -; LMULMAX1-RV32-NEXT: sw a1, 0(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a7 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11 +; LMULMAX1-RV32-NEXT: vmv.x.s a2, v26 +; LMULMAX1-RV32-NEXT: sw a1, 0(sp) +; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_11 ; LMULMAX1-RV32-NEXT: # %bb.10: -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 @@ -11762,8 +11742,8 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_12 ; LMULMAX1-RV32-NEXT: .LBB7_11: -; LMULMAX1-RV32-NEXT: srli a2, a1, 1 -; LMULMAX1-RV32-NEXT: or a1, a1, a2 +; LMULMAX1-RV32-NEXT: srli a1, a2, 1 +; LMULMAX1-RV32-NEXT: or a1, a2, a1 ; LMULMAX1-RV32-NEXT: srli a2, a1, 2 ; LMULMAX1-RV32-NEXT: or a1, a1, a2 ; LMULMAX1-RV32-NEXT: srli a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -2592,7 +2592,6 @@ ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v25 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX2-RV32-NEXT: vsrl.vx v25, v25, a6 ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 @@ -2774,7 +2773,6 @@ ; LMULMAX1-RV32-NEXT: vmv.x.s a5, v25 ; LMULMAX1-RV32-NEXT: bnez a5, .LBB3_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: addi a5, a1, -1 @@ -7655,9 +7653,9 @@ ; LMULMAX2-RV32-NEXT: addi a3, a1, 819 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a7, a1, -241 -; LMULMAX2-RV32-NEXT: lui a2, 4112 +; LMULMAX2-RV32-NEXT: lui a1, 4112 ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX2-RV32-NEXT: addi a2, a2, 257 +; LMULMAX2-RV32-NEXT: addi a2, a1, 257 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_2 ; LMULMAX2-RV32-NEXT: # %bb.1: ; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu @@ -7703,7 +7701,6 @@ ; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 ; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_5 ; LMULMAX2-RV32-NEXT: # %bb.4: -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 @@ -7740,13 +7737,11 @@ ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB7_6: -; LMULMAX2-RV32-NEXT: sw a5, 24(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v28, v26, 2 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v28 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_8 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 +; LMULMAX2-RV32-NEXT: sw a5, 24(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX2-RV32-NEXT: # %bb.7: -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vsrl.vx v28, v28, a6 ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v28 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 @@ -7767,9 +7762,9 @@ ; LMULMAX2-RV32-NEXT: addi a5, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_9 ; LMULMAX2-RV32-NEXT: .LBB7_8: -; LMULMAX2-RV32-NEXT: addi a1, a5, -1 -; LMULMAX2-RV32-NEXT: not a5, a5 -; LMULMAX2-RV32-NEXT: and a1, a5, a1 +; LMULMAX2-RV32-NEXT: addi a5, a1, -1 +; LMULMAX2-RV32-NEXT: not a1, a1 +; LMULMAX2-RV32-NEXT: and a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a5, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a5 @@ -7783,13 +7778,11 @@ ; LMULMAX2-RV32-NEXT: mul a1, a1, a2 ; LMULMAX2-RV32-NEXT: srli a5, a1, 24 ; LMULMAX2-RV32-NEXT: .LBB7_9: -; LMULMAX2-RV32-NEXT: sw a5, 16(sp) -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vslidedown.vi v26, v26, 1 -; LMULMAX2-RV32-NEXT: vmv.x.s a5, v26 -; LMULMAX2-RV32-NEXT: bnez a5, .LBB7_11 +; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 +; LMULMAX2-RV32-NEXT: sw a5, 16(sp) +; LMULMAX2-RV32-NEXT: bnez a1, .LBB7_11 ; LMULMAX2-RV32-NEXT: # %bb.10: -; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; LMULMAX2-RV32-NEXT: vsrl.vx v26, v26, a6 ; LMULMAX2-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX2-RV32-NEXT: addi a5, a1, -1 @@ -7810,9 +7803,9 @@ ; LMULMAX2-RV32-NEXT: addi a1, a1, 32 ; LMULMAX2-RV32-NEXT: j .LBB7_12 ; LMULMAX2-RV32-NEXT: .LBB7_11: -; LMULMAX2-RV32-NEXT: addi a1, a5, -1 -; LMULMAX2-RV32-NEXT: not a5, a5 -; LMULMAX2-RV32-NEXT: and a1, a5, a1 +; LMULMAX2-RV32-NEXT: addi a5, a1, -1 +; LMULMAX2-RV32-NEXT: not a1, a1 +; LMULMAX2-RV32-NEXT: and a1, a1, a5 ; LMULMAX2-RV32-NEXT: srli a5, a1, 1 ; LMULMAX2-RV32-NEXT: and a4, a5, a4 ; LMULMAX2-RV32-NEXT: sub a1, a1, a4 @@ -7978,9 +7971,9 @@ ; LMULMAX1-RV32-NEXT: addi a4, a1, 819 ; LMULMAX1-RV32-NEXT: lui a1, 61681 ; LMULMAX1-RV32-NEXT: addi t0, a1, -241 -; LMULMAX1-RV32-NEXT: lui a3, 4112 +; LMULMAX1-RV32-NEXT: lui a2, 4112 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 -; LMULMAX1-RV32-NEXT: addi a3, a3, 257 +; LMULMAX1-RV32-NEXT: addi a3, a2, 257 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_2 ; LMULMAX1-RV32-NEXT: # %bb.1: ; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu @@ -8026,7 +8019,6 @@ ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_5 ; LMULMAX1-RV32-NEXT: # %bb.4: -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v26, a6 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: addi a2, a1, -1 @@ -8065,12 +8057,10 @@ ; LMULMAX1-RV32-NEXT: .LBB7_6: ; LMULMAX1-RV32-NEXT: sw a1, 24(sp) ; LMULMAX1-RV32-NEXT: sw zero, 12(sp) -; LMULMAX1-RV32-NEXT: sw zero, 4(sp) -; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 +; LMULMAX1-RV32-NEXT: sw zero, 4(sp) ; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_8 ; LMULMAX1-RV32-NEXT: # %bb.7: -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v26, v25, a6 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v26 ; LMULMAX1-RV32-NEXT: addi a2, a1, -1 @@ -8107,13 +8097,11 @@ ; LMULMAX1-RV32-NEXT: mul a1, a1, a3 ; LMULMAX1-RV32-NEXT: srli a1, a1, 24 ; LMULMAX1-RV32-NEXT: .LBB7_9: -; LMULMAX1-RV32-NEXT: sw a1, 0(sp) -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vslidedown.vi v25, v25, 1 -; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 -; LMULMAX1-RV32-NEXT: bnez a1, .LBB7_11 +; LMULMAX1-RV32-NEXT: vmv.x.s a2, v25 +; LMULMAX1-RV32-NEXT: sw a1, 0(sp) +; LMULMAX1-RV32-NEXT: bnez a2, .LBB7_11 ; LMULMAX1-RV32-NEXT: # %bb.10: -; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e64,m1,ta,mu ; LMULMAX1-RV32-NEXT: vsrl.vx v25, v25, a6 ; LMULMAX1-RV32-NEXT: vmv.x.s a1, v25 ; LMULMAX1-RV32-NEXT: addi a2, a1, -1 @@ -8134,9 +8122,9 @@ ; LMULMAX1-RV32-NEXT: addi a1, a1, 32 ; LMULMAX1-RV32-NEXT: j .LBB7_12 ; LMULMAX1-RV32-NEXT: .LBB7_11: -; LMULMAX1-RV32-NEXT: addi a2, a1, -1 -; LMULMAX1-RV32-NEXT: not a1, a1 -; LMULMAX1-RV32-NEXT: and a1, a1, a2 +; LMULMAX1-RV32-NEXT: addi a1, a2, -1 +; LMULMAX1-RV32-NEXT: not a2, a2 +; LMULMAX1-RV32-NEXT: and a1, a2, a1 ; LMULMAX1-RV32-NEXT: srli a2, a1, 1 ; LMULMAX1-RV32-NEXT: and a2, a2, a5 ; LMULMAX1-RV32-NEXT: sub a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -3959,7 +3959,6 @@ ; LMULMAX2-NEXT: vsetivli a2, 4, e64,m2,ta,mu ; LMULMAX2-NEXT: vle64.v v26, (a0) ; LMULMAX2-NEXT: vle64.v v28, (a1) -; LMULMAX2-NEXT: vsetivli a1, 4, e64,m2,ta,mu ; LMULMAX2-NEXT: vadd.vv v26, v26, v28 ; LMULMAX2-NEXT: vse64.v v26, (a0) ; LMULMAX2-NEXT: ret @@ -3973,7 +3972,6 @@ ; LMULMAX1-NEXT: vle64.v v27, (a1) ; LMULMAX1-NEXT: addi a1, a1, 16 ; LMULMAX1-NEXT: vle64.v v28, (a1) -; LMULMAX1-NEXT: vsetivli a1, 2, e64,m1,ta,mu ; LMULMAX1-NEXT: vadd.vv v26, v26, v28 ; LMULMAX1-NEXT: vadd.vv v25, v25, v27 ; LMULMAX1-NEXT: vse64.v v25, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll @@ -47,14 +47,11 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: bnez a0, .LBB1_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v9 ; CHECK-NEXT: j .LBB1_6 ; CHECK-NEXT: .LBB1_5: -; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: .LBB1_6: -; CHECK-NEXT: vsetivli a0, 2, e16,mf4,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 ; CHECK-NEXT: ret @@ -91,22 +88,18 @@ ; CHECK-NEXT: fsh ft0, 14(sp) ; CHECK-NEXT: bnez a0, .LBB2_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB2_8 ; CHECK-NEXT: .LBB2_7: -; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB2_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB2_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB2_11 ; CHECK-NEXT: .LBB2_10: -; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB2_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -149,22 +142,18 @@ ; CHECK-NEXT: fsh ft0, 14(sp) ; CHECK-NEXT: bnez a0, .LBB3_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB3_8 ; CHECK-NEXT: .LBB3_7: -; CHECK-NEXT: vsetivli a1, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB3_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB3_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB3_11 ; CHECK-NEXT: .LBB3_10: -; CHECK-NEXT: vsetivli a0, 1, e16,mf2,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB3_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -207,66 +196,54 @@ ; CHECK-NEXT: fsh ft0, 14(sp) ; CHECK-NEXT: bnez a0, .LBB4_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 6 ; CHECK-NEXT: j .LBB4_8 ; CHECK-NEXT: .LBB4_7: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 6 ; CHECK-NEXT: .LBB4_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB4_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 5 ; CHECK-NEXT: j .LBB4_11 ; CHECK-NEXT: .LBB4_10: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 5 ; CHECK-NEXT: .LBB4_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 10(sp) ; CHECK-NEXT: bnez a0, .LBB4_13 ; CHECK-NEXT: # %bb.12: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 4 ; CHECK-NEXT: j .LBB4_14 ; CHECK-NEXT: .LBB4_13: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 4 ; CHECK-NEXT: .LBB4_14: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 8(sp) ; CHECK-NEXT: bnez a0, .LBB4_16 ; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 ; CHECK-NEXT: j .LBB4_17 ; CHECK-NEXT: .LBB4_16: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: .LBB4_17: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 6(sp) ; CHECK-NEXT: bnez a0, .LBB4_19 ; CHECK-NEXT: # %bb.18: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB4_20 ; CHECK-NEXT: .LBB4_19: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB4_20: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 4(sp) ; CHECK-NEXT: bnez a0, .LBB4_22 ; CHECK-NEXT: # %bb.21: -; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB4_23 ; CHECK-NEXT: .LBB4_22: -; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB4_23: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -308,66 +285,54 @@ ; CHECK-NEXT: fsh ft0, 14(sp) ; CHECK-NEXT: bnez a0, .LBB5_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 6 ; CHECK-NEXT: j .LBB5_8 ; CHECK-NEXT: .LBB5_7: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 6 ; CHECK-NEXT: .LBB5_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB5_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 5 ; CHECK-NEXT: j .LBB5_11 ; CHECK-NEXT: .LBB5_10: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 5 ; CHECK-NEXT: .LBB5_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 10(sp) ; CHECK-NEXT: bnez a0, .LBB5_13 ; CHECK-NEXT: # %bb.12: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 4 ; CHECK-NEXT: j .LBB5_14 ; CHECK-NEXT: .LBB5_13: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 4 ; CHECK-NEXT: .LBB5_14: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 8(sp) ; CHECK-NEXT: bnez a0, .LBB5_16 ; CHECK-NEXT: # %bb.15: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 3 ; CHECK-NEXT: j .LBB5_17 ; CHECK-NEXT: .LBB5_16: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 3 ; CHECK-NEXT: .LBB5_17: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 6(sp) ; CHECK-NEXT: bnez a0, .LBB5_19 ; CHECK-NEXT: # %bb.18: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB5_20 ; CHECK-NEXT: .LBB5_19: -; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB5_20: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsh ft0, 4(sp) ; CHECK-NEXT: bnez a0, .LBB5_22 ; CHECK-NEXT: # %bb.21: -; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB5_23 ; CHECK-NEXT: .LBB5_22: -; CHECK-NEXT: vsetivli a0, 1, e16,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB5_23: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -416,154 +381,126 @@ ; RV32-NEXT: fsh ft0, 30(sp) ; RV32-NEXT: bnez a0, .LBB6_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 14 ; RV32-NEXT: j .LBB6_8 ; RV32-NEXT: .LBB6_7: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 14 ; RV32-NEXT: .LBB6_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB6_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 13 ; RV32-NEXT: j .LBB6_11 ; RV32-NEXT: .LBB6_10: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 13 ; RV32-NEXT: .LBB6_11: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 26(sp) ; RV32-NEXT: bnez a0, .LBB6_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 12 ; RV32-NEXT: j .LBB6_14 ; RV32-NEXT: .LBB6_13: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 12 ; RV32-NEXT: .LBB6_14: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB6_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 11 ; RV32-NEXT: j .LBB6_17 ; RV32-NEXT: .LBB6_16: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 11 ; RV32-NEXT: .LBB6_17: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 22(sp) ; RV32-NEXT: bnez a0, .LBB6_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 10 ; RV32-NEXT: j .LBB6_20 ; RV32-NEXT: .LBB6_19: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 10 ; RV32-NEXT: .LBB6_20: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB6_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 9 ; RV32-NEXT: j .LBB6_23 ; RV32-NEXT: .LBB6_22: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 9 ; RV32-NEXT: .LBB6_23: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 18(sp) ; RV32-NEXT: bnez a0, .LBB6_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 8 ; RV32-NEXT: j .LBB6_26 ; RV32-NEXT: .LBB6_25: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 8 ; RV32-NEXT: .LBB6_26: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB6_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 7 ; RV32-NEXT: j .LBB6_29 ; RV32-NEXT: .LBB6_28: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 7 ; RV32-NEXT: .LBB6_29: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 14(sp) ; RV32-NEXT: bnez a0, .LBB6_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 ; RV32-NEXT: j .LBB6_32 ; RV32-NEXT: .LBB6_31: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 ; RV32-NEXT: .LBB6_32: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB6_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 ; RV32-NEXT: j .LBB6_35 ; RV32-NEXT: .LBB6_34: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 ; RV32-NEXT: .LBB6_35: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 10(sp) ; RV32-NEXT: bnez a0, .LBB6_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 ; RV32-NEXT: j .LBB6_38 ; RV32-NEXT: .LBB6_37: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 ; RV32-NEXT: .LBB6_38: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB6_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 ; RV32-NEXT: j .LBB6_41 ; RV32-NEXT: .LBB6_40: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 ; RV32-NEXT: .LBB6_41: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 6(sp) ; RV32-NEXT: bnez a0, .LBB6_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB6_44 ; RV32-NEXT: .LBB6_43: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB6_44: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 4(sp) ; RV32-NEXT: bnez a0, .LBB6_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB6_47 ; RV32-NEXT: .LBB6_46: -; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB6_47: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -610,154 +547,126 @@ ; RV64-NEXT: fsh ft0, 30(sp) ; RV64-NEXT: bnez a0, .LBB6_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 14 ; RV64-NEXT: j .LBB6_8 ; RV64-NEXT: .LBB6_7: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 14 ; RV64-NEXT: .LBB6_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB6_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 13 ; RV64-NEXT: j .LBB6_11 ; RV64-NEXT: .LBB6_10: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 13 ; RV64-NEXT: .LBB6_11: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 26(sp) ; RV64-NEXT: bnez a0, .LBB6_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 12 ; RV64-NEXT: j .LBB6_14 ; RV64-NEXT: .LBB6_13: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 12 ; RV64-NEXT: .LBB6_14: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB6_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 11 ; RV64-NEXT: j .LBB6_17 ; RV64-NEXT: .LBB6_16: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 11 ; RV64-NEXT: .LBB6_17: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 22(sp) ; RV64-NEXT: bnez a0, .LBB6_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 10 ; RV64-NEXT: j .LBB6_20 ; RV64-NEXT: .LBB6_19: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 10 ; RV64-NEXT: .LBB6_20: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB6_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 9 ; RV64-NEXT: j .LBB6_23 ; RV64-NEXT: .LBB6_22: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 9 ; RV64-NEXT: .LBB6_23: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 18(sp) ; RV64-NEXT: bnez a0, .LBB6_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 8 ; RV64-NEXT: j .LBB6_26 ; RV64-NEXT: .LBB6_25: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 8 ; RV64-NEXT: .LBB6_26: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB6_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 7 ; RV64-NEXT: j .LBB6_29 ; RV64-NEXT: .LBB6_28: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 7 ; RV64-NEXT: .LBB6_29: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 14(sp) ; RV64-NEXT: bnez a0, .LBB6_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 ; RV64-NEXT: j .LBB6_32 ; RV64-NEXT: .LBB6_31: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 ; RV64-NEXT: .LBB6_32: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB6_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 ; RV64-NEXT: j .LBB6_35 ; RV64-NEXT: .LBB6_34: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 ; RV64-NEXT: .LBB6_35: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 10(sp) ; RV64-NEXT: bnez a0, .LBB6_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 ; RV64-NEXT: j .LBB6_38 ; RV64-NEXT: .LBB6_37: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 ; RV64-NEXT: .LBB6_38: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB6_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 ; RV64-NEXT: j .LBB6_41 ; RV64-NEXT: .LBB6_40: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 ; RV64-NEXT: .LBB6_41: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 6(sp) ; RV64-NEXT: bnez a0, .LBB6_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB6_44 ; RV64-NEXT: .LBB6_43: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB6_44: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 4(sp) ; RV64-NEXT: bnez a0, .LBB6_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB6_47 ; RV64-NEXT: .LBB6_46: -; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB6_47: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -809,154 +718,126 @@ ; RV32-NEXT: fsh ft0, 30(sp) ; RV32-NEXT: bnez a0, .LBB7_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 14 ; RV32-NEXT: j .LBB7_8 ; RV32-NEXT: .LBB7_7: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 14 ; RV32-NEXT: .LBB7_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB7_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 13 ; RV32-NEXT: j .LBB7_11 ; RV32-NEXT: .LBB7_10: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 13 ; RV32-NEXT: .LBB7_11: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 26(sp) ; RV32-NEXT: bnez a0, .LBB7_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 12 ; RV32-NEXT: j .LBB7_14 ; RV32-NEXT: .LBB7_13: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 12 ; RV32-NEXT: .LBB7_14: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB7_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 11 ; RV32-NEXT: j .LBB7_17 ; RV32-NEXT: .LBB7_16: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 11 ; RV32-NEXT: .LBB7_17: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 22(sp) ; RV32-NEXT: bnez a0, .LBB7_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 10 ; RV32-NEXT: j .LBB7_20 ; RV32-NEXT: .LBB7_19: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 10 ; RV32-NEXT: .LBB7_20: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB7_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 9 ; RV32-NEXT: j .LBB7_23 ; RV32-NEXT: .LBB7_22: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 9 ; RV32-NEXT: .LBB7_23: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 18(sp) ; RV32-NEXT: bnez a0, .LBB7_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 8 ; RV32-NEXT: j .LBB7_26 ; RV32-NEXT: .LBB7_25: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 8 ; RV32-NEXT: .LBB7_26: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB7_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 7 ; RV32-NEXT: j .LBB7_29 ; RV32-NEXT: .LBB7_28: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 7 ; RV32-NEXT: .LBB7_29: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 14(sp) ; RV32-NEXT: bnez a0, .LBB7_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 ; RV32-NEXT: j .LBB7_32 ; RV32-NEXT: .LBB7_31: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 ; RV32-NEXT: .LBB7_32: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB7_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 ; RV32-NEXT: j .LBB7_35 ; RV32-NEXT: .LBB7_34: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 ; RV32-NEXT: .LBB7_35: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 10(sp) ; RV32-NEXT: bnez a0, .LBB7_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 ; RV32-NEXT: j .LBB7_38 ; RV32-NEXT: .LBB7_37: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 ; RV32-NEXT: .LBB7_38: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB7_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 ; RV32-NEXT: j .LBB7_41 ; RV32-NEXT: .LBB7_40: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 ; RV32-NEXT: .LBB7_41: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 6(sp) ; RV32-NEXT: bnez a0, .LBB7_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB7_44 ; RV32-NEXT: .LBB7_43: -; RV32-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB7_44: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsh ft0, 4(sp) ; RV32-NEXT: bnez a0, .LBB7_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB7_47 ; RV32-NEXT: .LBB7_46: -; RV32-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB7_47: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -1004,154 +885,126 @@ ; RV64-NEXT: fsh ft0, 30(sp) ; RV64-NEXT: bnez a0, .LBB7_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 14 ; RV64-NEXT: j .LBB7_8 ; RV64-NEXT: .LBB7_7: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 14 ; RV64-NEXT: .LBB7_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB7_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 13 ; RV64-NEXT: j .LBB7_11 ; RV64-NEXT: .LBB7_10: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 13 ; RV64-NEXT: .LBB7_11: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 26(sp) ; RV64-NEXT: bnez a0, .LBB7_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 12 ; RV64-NEXT: j .LBB7_14 ; RV64-NEXT: .LBB7_13: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 12 ; RV64-NEXT: .LBB7_14: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB7_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 11 ; RV64-NEXT: j .LBB7_17 ; RV64-NEXT: .LBB7_16: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 11 ; RV64-NEXT: .LBB7_17: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 22(sp) ; RV64-NEXT: bnez a0, .LBB7_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 10 ; RV64-NEXT: j .LBB7_20 ; RV64-NEXT: .LBB7_19: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 10 ; RV64-NEXT: .LBB7_20: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB7_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 9 ; RV64-NEXT: j .LBB7_23 ; RV64-NEXT: .LBB7_22: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 9 ; RV64-NEXT: .LBB7_23: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 18(sp) ; RV64-NEXT: bnez a0, .LBB7_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 8 ; RV64-NEXT: j .LBB7_26 ; RV64-NEXT: .LBB7_25: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 8 ; RV64-NEXT: .LBB7_26: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB7_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 7 ; RV64-NEXT: j .LBB7_29 ; RV64-NEXT: .LBB7_28: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 7 ; RV64-NEXT: .LBB7_29: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 14(sp) ; RV64-NEXT: bnez a0, .LBB7_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 ; RV64-NEXT: j .LBB7_32 ; RV64-NEXT: .LBB7_31: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 ; RV64-NEXT: .LBB7_32: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB7_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 ; RV64-NEXT: j .LBB7_35 ; RV64-NEXT: .LBB7_34: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 ; RV64-NEXT: .LBB7_35: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 10(sp) ; RV64-NEXT: bnez a0, .LBB7_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 ; RV64-NEXT: j .LBB7_38 ; RV64-NEXT: .LBB7_37: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 ; RV64-NEXT: .LBB7_38: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB7_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 ; RV64-NEXT: j .LBB7_41 ; RV64-NEXT: .LBB7_40: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 ; RV64-NEXT: .LBB7_41: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 6(sp) ; RV64-NEXT: bnez a0, .LBB7_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB7_44 ; RV64-NEXT: .LBB7_43: -; RV64-NEXT: vsetivli a1, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB7_44: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsh ft0, 4(sp) ; RV64-NEXT: bnez a0, .LBB7_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB7_47 ; RV64-NEXT: .LBB7_46: -; RV64-NEXT: vsetivli a0, 1, e16,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB7_47: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -1211,14 +1064,11 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: bnez a0, .LBB9_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v9 ; CHECK-NEXT: j .LBB9_6 ; CHECK-NEXT: .LBB9_5: -; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: .LBB9_6: -; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 ; CHECK-NEXT: ret @@ -1255,22 +1105,18 @@ ; CHECK-NEXT: fsw ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB10_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB10_8 ; CHECK-NEXT: .LBB10_7: -; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB10_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsw ft0, 8(sp) ; CHECK-NEXT: bnez a0, .LBB10_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB10_11 ; CHECK-NEXT: .LBB10_10: -; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB10_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -1312,22 +1158,18 @@ ; CHECK-NEXT: fsw ft0, 12(sp) ; CHECK-NEXT: bnez a0, .LBB11_7 ; CHECK-NEXT: # %bb.6: -; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 2 ; CHECK-NEXT: j .LBB11_8 ; CHECK-NEXT: .LBB11_7: -; CHECK-NEXT: vsetivli a1, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 2 ; CHECK-NEXT: .LBB11_8: ; CHECK-NEXT: vfmv.f.s ft0, v25 ; CHECK-NEXT: fsw ft0, 8(sp) ; CHECK-NEXT: bnez a0, .LBB11_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v9, 1 ; CHECK-NEXT: j .LBB11_11 ; CHECK-NEXT: .LBB11_10: -; CHECK-NEXT: vsetivli a0, 1, e32,m1,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v8, 1 ; CHECK-NEXT: .LBB11_11: ; CHECK-NEXT: vfmv.f.s ft0, v25 @@ -1376,66 +1218,54 @@ ; RV32-NEXT: fsw ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB12_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 ; RV32-NEXT: j .LBB12_8 ; RV32-NEXT: .LBB12_7: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 ; RV32-NEXT: .LBB12_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB12_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 ; RV32-NEXT: j .LBB12_11 ; RV32-NEXT: .LBB12_10: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 ; RV32-NEXT: .LBB12_11: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB12_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 ; RV32-NEXT: j .LBB12_14 ; RV32-NEXT: .LBB12_13: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 ; RV32-NEXT: .LBB12_14: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB12_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 ; RV32-NEXT: j .LBB12_17 ; RV32-NEXT: .LBB12_16: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 ; RV32-NEXT: .LBB12_17: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB12_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB12_20 ; RV32-NEXT: .LBB12_19: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB12_20: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB12_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB12_23 ; RV32-NEXT: .LBB12_22: -; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB12_23: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -1482,66 +1312,54 @@ ; RV64-NEXT: fsw ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB12_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 ; RV64-NEXT: j .LBB12_8 ; RV64-NEXT: .LBB12_7: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 ; RV64-NEXT: .LBB12_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB12_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 ; RV64-NEXT: j .LBB12_11 ; RV64-NEXT: .LBB12_10: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 ; RV64-NEXT: .LBB12_11: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB12_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 ; RV64-NEXT: j .LBB12_14 ; RV64-NEXT: .LBB12_13: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 ; RV64-NEXT: .LBB12_14: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB12_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 ; RV64-NEXT: j .LBB12_17 ; RV64-NEXT: .LBB12_16: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 ; RV64-NEXT: .LBB12_17: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB12_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB12_20 ; RV64-NEXT: .LBB12_19: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB12_20: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB12_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB12_23 ; RV64-NEXT: .LBB12_22: -; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB12_23: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -1593,66 +1411,54 @@ ; RV32-NEXT: fsw ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB13_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 6 ; RV32-NEXT: j .LBB13_8 ; RV32-NEXT: .LBB13_7: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 6 ; RV32-NEXT: .LBB13_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB13_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 5 ; RV32-NEXT: j .LBB13_11 ; RV32-NEXT: .LBB13_10: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 5 ; RV32-NEXT: .LBB13_11: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB13_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 4 ; RV32-NEXT: j .LBB13_14 ; RV32-NEXT: .LBB13_13: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 4 ; RV32-NEXT: .LBB13_14: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB13_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 3 ; RV32-NEXT: j .LBB13_17 ; RV32-NEXT: .LBB13_16: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 3 ; RV32-NEXT: .LBB13_17: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB13_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB13_20 ; RV32-NEXT: .LBB13_19: -; RV32-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB13_20: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsw ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB13_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB13_23 ; RV32-NEXT: .LBB13_22: -; RV32-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB13_23: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -1700,66 +1506,54 @@ ; RV64-NEXT: fsw ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB13_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 6 ; RV64-NEXT: j .LBB13_8 ; RV64-NEXT: .LBB13_7: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 6 ; RV64-NEXT: .LBB13_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB13_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 5 ; RV64-NEXT: j .LBB13_11 ; RV64-NEXT: .LBB13_10: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 5 ; RV64-NEXT: .LBB13_11: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB13_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 4 ; RV64-NEXT: j .LBB13_14 ; RV64-NEXT: .LBB13_13: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 4 ; RV64-NEXT: .LBB13_14: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB13_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 3 ; RV64-NEXT: j .LBB13_17 ; RV64-NEXT: .LBB13_16: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 3 ; RV64-NEXT: .LBB13_17: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB13_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB13_20 ; RV64-NEXT: .LBB13_19: -; RV64-NEXT: vsetivli a1, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB13_20: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsw ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB13_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB13_23 ; RV64-NEXT: .LBB13_22: -; RV64-NEXT: vsetivli a0, 1, e32,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB13_23: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -1811,154 +1605,126 @@ ; RV32-NEXT: fsw ft0, 60(sp) ; RV32-NEXT: bnez a0, .LBB14_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 14 ; RV32-NEXT: j .LBB14_8 ; RV32-NEXT: .LBB14_7: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 14 ; RV32-NEXT: .LBB14_8: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB14_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 13 ; RV32-NEXT: j .LBB14_11 ; RV32-NEXT: .LBB14_10: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 13 ; RV32-NEXT: .LBB14_11: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 52(sp) ; RV32-NEXT: bnez a0, .LBB14_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 12 ; RV32-NEXT: j .LBB14_14 ; RV32-NEXT: .LBB14_13: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 12 ; RV32-NEXT: .LBB14_14: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB14_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 11 ; RV32-NEXT: j .LBB14_17 ; RV32-NEXT: .LBB14_16: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 11 ; RV32-NEXT: .LBB14_17: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 44(sp) ; RV32-NEXT: bnez a0, .LBB14_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 10 ; RV32-NEXT: j .LBB14_20 ; RV32-NEXT: .LBB14_19: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 10 ; RV32-NEXT: .LBB14_20: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB14_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 9 ; RV32-NEXT: j .LBB14_23 ; RV32-NEXT: .LBB14_22: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 9 ; RV32-NEXT: .LBB14_23: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 36(sp) ; RV32-NEXT: bnez a0, .LBB14_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 8 ; RV32-NEXT: j .LBB14_26 ; RV32-NEXT: .LBB14_25: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 8 ; RV32-NEXT: .LBB14_26: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB14_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 ; RV32-NEXT: j .LBB14_29 ; RV32-NEXT: .LBB14_28: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 7 ; RV32-NEXT: .LBB14_29: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB14_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 ; RV32-NEXT: j .LBB14_32 ; RV32-NEXT: .LBB14_31: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 ; RV32-NEXT: .LBB14_32: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB14_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 ; RV32-NEXT: j .LBB14_35 ; RV32-NEXT: .LBB14_34: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 ; RV32-NEXT: .LBB14_35: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB14_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 ; RV32-NEXT: j .LBB14_38 ; RV32-NEXT: .LBB14_37: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 ; RV32-NEXT: .LBB14_38: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB14_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 ; RV32-NEXT: j .LBB14_41 ; RV32-NEXT: .LBB14_40: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 ; RV32-NEXT: .LBB14_41: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB14_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 ; RV32-NEXT: j .LBB14_44 ; RV32-NEXT: .LBB14_43: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 2 ; RV32-NEXT: .LBB14_44: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB14_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 1 ; RV32-NEXT: j .LBB14_47 ; RV32-NEXT: .LBB14_46: -; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 ; RV32-NEXT: .LBB14_47: ; RV32-NEXT: vfmv.f.s ft0, v28 @@ -2005,154 +1771,126 @@ ; RV64-NEXT: fsw ft0, 60(sp) ; RV64-NEXT: bnez a0, .LBB14_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 14 ; RV64-NEXT: j .LBB14_8 ; RV64-NEXT: .LBB14_7: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 14 ; RV64-NEXT: .LBB14_8: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB14_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 13 ; RV64-NEXT: j .LBB14_11 ; RV64-NEXT: .LBB14_10: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 13 ; RV64-NEXT: .LBB14_11: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 52(sp) ; RV64-NEXT: bnez a0, .LBB14_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 12 ; RV64-NEXT: j .LBB14_14 ; RV64-NEXT: .LBB14_13: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 12 ; RV64-NEXT: .LBB14_14: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB14_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 11 ; RV64-NEXT: j .LBB14_17 ; RV64-NEXT: .LBB14_16: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 11 ; RV64-NEXT: .LBB14_17: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 44(sp) ; RV64-NEXT: bnez a0, .LBB14_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 10 ; RV64-NEXT: j .LBB14_20 ; RV64-NEXT: .LBB14_19: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 10 ; RV64-NEXT: .LBB14_20: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB14_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 9 ; RV64-NEXT: j .LBB14_23 ; RV64-NEXT: .LBB14_22: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 9 ; RV64-NEXT: .LBB14_23: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 36(sp) ; RV64-NEXT: bnez a0, .LBB14_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 8 ; RV64-NEXT: j .LBB14_26 ; RV64-NEXT: .LBB14_25: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 8 ; RV64-NEXT: .LBB14_26: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB14_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 ; RV64-NEXT: j .LBB14_29 ; RV64-NEXT: .LBB14_28: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 7 ; RV64-NEXT: .LBB14_29: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB14_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 ; RV64-NEXT: j .LBB14_32 ; RV64-NEXT: .LBB14_31: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 ; RV64-NEXT: .LBB14_32: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB14_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 ; RV64-NEXT: j .LBB14_35 ; RV64-NEXT: .LBB14_34: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 ; RV64-NEXT: .LBB14_35: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB14_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 ; RV64-NEXT: j .LBB14_38 ; RV64-NEXT: .LBB14_37: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 ; RV64-NEXT: .LBB14_38: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB14_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 ; RV64-NEXT: j .LBB14_41 ; RV64-NEXT: .LBB14_40: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 ; RV64-NEXT: .LBB14_41: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB14_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 ; RV64-NEXT: j .LBB14_44 ; RV64-NEXT: .LBB14_43: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 2 ; RV64-NEXT: .LBB14_44: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB14_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 1 ; RV64-NEXT: j .LBB14_47 ; RV64-NEXT: .LBB14_46: -; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 ; RV64-NEXT: .LBB14_47: ; RV64-NEXT: vfmv.f.s ft0, v28 @@ -2204,154 +1942,126 @@ ; RV32-NEXT: fsw ft0, 60(sp) ; RV32-NEXT: bnez a0, .LBB15_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 14 ; RV32-NEXT: j .LBB15_8 ; RV32-NEXT: .LBB15_7: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 14 ; RV32-NEXT: .LBB15_8: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB15_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 13 ; RV32-NEXT: j .LBB15_11 ; RV32-NEXT: .LBB15_10: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 13 ; RV32-NEXT: .LBB15_11: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 52(sp) ; RV32-NEXT: bnez a0, .LBB15_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 12 ; RV32-NEXT: j .LBB15_14 ; RV32-NEXT: .LBB15_13: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 12 ; RV32-NEXT: .LBB15_14: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB15_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 11 ; RV32-NEXT: j .LBB15_17 ; RV32-NEXT: .LBB15_16: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 11 ; RV32-NEXT: .LBB15_17: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 44(sp) ; RV32-NEXT: bnez a0, .LBB15_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 10 ; RV32-NEXT: j .LBB15_20 ; RV32-NEXT: .LBB15_19: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 10 ; RV32-NEXT: .LBB15_20: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB15_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 9 ; RV32-NEXT: j .LBB15_23 ; RV32-NEXT: .LBB15_22: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 9 ; RV32-NEXT: .LBB15_23: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 36(sp) ; RV32-NEXT: bnez a0, .LBB15_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 8 ; RV32-NEXT: j .LBB15_26 ; RV32-NEXT: .LBB15_25: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 8 ; RV32-NEXT: .LBB15_26: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB15_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 7 ; RV32-NEXT: j .LBB15_29 ; RV32-NEXT: .LBB15_28: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 7 ; RV32-NEXT: .LBB15_29: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 28(sp) ; RV32-NEXT: bnez a0, .LBB15_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 ; RV32-NEXT: j .LBB15_32 ; RV32-NEXT: .LBB15_31: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 ; RV32-NEXT: .LBB15_32: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB15_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 ; RV32-NEXT: j .LBB15_35 ; RV32-NEXT: .LBB15_34: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 ; RV32-NEXT: .LBB15_35: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 20(sp) ; RV32-NEXT: bnez a0, .LBB15_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 ; RV32-NEXT: j .LBB15_38 ; RV32-NEXT: .LBB15_37: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 ; RV32-NEXT: .LBB15_38: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB15_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 ; RV32-NEXT: j .LBB15_41 ; RV32-NEXT: .LBB15_40: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 ; RV32-NEXT: .LBB15_41: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 12(sp) ; RV32-NEXT: bnez a0, .LBB15_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 ; RV32-NEXT: j .LBB15_44 ; RV32-NEXT: .LBB15_43: -; RV32-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 2 ; RV32-NEXT: .LBB15_44: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsw ft0, 8(sp) ; RV32-NEXT: bnez a0, .LBB15_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 1 ; RV32-NEXT: j .LBB15_47 ; RV32-NEXT: .LBB15_46: -; RV32-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 ; RV32-NEXT: .LBB15_47: ; RV32-NEXT: vfmv.f.s ft0, v28 @@ -2399,154 +2109,126 @@ ; RV64-NEXT: fsw ft0, 60(sp) ; RV64-NEXT: bnez a0, .LBB15_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 14 ; RV64-NEXT: j .LBB15_8 ; RV64-NEXT: .LBB15_7: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 14 ; RV64-NEXT: .LBB15_8: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB15_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 13 ; RV64-NEXT: j .LBB15_11 ; RV64-NEXT: .LBB15_10: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 13 ; RV64-NEXT: .LBB15_11: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 52(sp) ; RV64-NEXT: bnez a0, .LBB15_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 12 ; RV64-NEXT: j .LBB15_14 ; RV64-NEXT: .LBB15_13: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 12 ; RV64-NEXT: .LBB15_14: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB15_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 11 ; RV64-NEXT: j .LBB15_17 ; RV64-NEXT: .LBB15_16: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 11 ; RV64-NEXT: .LBB15_17: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 44(sp) ; RV64-NEXT: bnez a0, .LBB15_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 10 ; RV64-NEXT: j .LBB15_20 ; RV64-NEXT: .LBB15_19: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 10 ; RV64-NEXT: .LBB15_20: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB15_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 9 ; RV64-NEXT: j .LBB15_23 ; RV64-NEXT: .LBB15_22: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 9 ; RV64-NEXT: .LBB15_23: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 36(sp) ; RV64-NEXT: bnez a0, .LBB15_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 8 ; RV64-NEXT: j .LBB15_26 ; RV64-NEXT: .LBB15_25: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 8 ; RV64-NEXT: .LBB15_26: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB15_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 7 ; RV64-NEXT: j .LBB15_29 ; RV64-NEXT: .LBB15_28: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 7 ; RV64-NEXT: .LBB15_29: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 28(sp) ; RV64-NEXT: bnez a0, .LBB15_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 ; RV64-NEXT: j .LBB15_32 ; RV64-NEXT: .LBB15_31: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 ; RV64-NEXT: .LBB15_32: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB15_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 ; RV64-NEXT: j .LBB15_35 ; RV64-NEXT: .LBB15_34: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 ; RV64-NEXT: .LBB15_35: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 20(sp) ; RV64-NEXT: bnez a0, .LBB15_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 ; RV64-NEXT: j .LBB15_38 ; RV64-NEXT: .LBB15_37: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 ; RV64-NEXT: .LBB15_38: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB15_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 ; RV64-NEXT: j .LBB15_41 ; RV64-NEXT: .LBB15_40: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 ; RV64-NEXT: .LBB15_41: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 12(sp) ; RV64-NEXT: bnez a0, .LBB15_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 ; RV64-NEXT: j .LBB15_44 ; RV64-NEXT: .LBB15_43: -; RV64-NEXT: vsetivli a1, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 2 ; RV64-NEXT: .LBB15_44: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsw ft0, 8(sp) ; RV64-NEXT: bnez a0, .LBB15_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 1 ; RV64-NEXT: j .LBB15_47 ; RV64-NEXT: .LBB15_46: -; RV64-NEXT: vsetivli a0, 1, e32,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 ; RV64-NEXT: .LBB15_47: ; RV64-NEXT: vfmv.f.s ft0, v28 @@ -2606,14 +2288,11 @@ ; CHECK-NEXT: vfmv.v.f v25, ft0 ; CHECK-NEXT: bnez a0, .LBB17_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v9 ; CHECK-NEXT: j .LBB17_6 ; CHECK-NEXT: .LBB17_5: -; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu ; CHECK-NEXT: vfmv.f.s ft0, v8 ; CHECK-NEXT: .LBB17_6: -; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu ; CHECK-NEXT: vfmv.s.f v25, ft0 ; CHECK-NEXT: vmv1r.v v8, v25 ; CHECK-NEXT: ret @@ -2657,22 +2336,18 @@ ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB18_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB18_8 ; RV32-NEXT: .LBB18_7: -; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB18_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB18_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB18_11 ; RV32-NEXT: .LBB18_10: -; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB18_11: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -2719,22 +2394,18 @@ ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB18_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB18_8 ; RV64-NEXT: .LBB18_7: -; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB18_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB18_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB18_11 ; RV64-NEXT: .LBB18_10: -; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB18_11: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -2786,22 +2457,18 @@ ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB19_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 2 ; RV32-NEXT: j .LBB19_8 ; RV32-NEXT: .LBB19_7: -; RV32-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 2 ; RV32-NEXT: .LBB19_8: ; RV32-NEXT: vfmv.f.s ft0, v26 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB19_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v10, 1 ; RV32-NEXT: j .LBB19_11 ; RV32-NEXT: .LBB19_10: -; RV32-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV32-NEXT: vslidedown.vi v26, v8, 1 ; RV32-NEXT: .LBB19_11: ; RV32-NEXT: vfmv.f.s ft0, v26 @@ -2849,22 +2516,18 @@ ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB19_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 2 ; RV64-NEXT: j .LBB19_8 ; RV64-NEXT: .LBB19_7: -; RV64-NEXT: vsetivli a1, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 2 ; RV64-NEXT: .LBB19_8: ; RV64-NEXT: vfmv.f.s ft0, v26 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB19_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v10, 1 ; RV64-NEXT: j .LBB19_11 ; RV64-NEXT: .LBB19_10: -; RV64-NEXT: vsetivli a0, 1, e64,m2,ta,mu ; RV64-NEXT: vslidedown.vi v26, v8, 1 ; RV64-NEXT: .LBB19_11: ; RV64-NEXT: vfmv.f.s ft0, v26 @@ -2916,66 +2579,54 @@ ; RV32-NEXT: fsd ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB20_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 ; RV32-NEXT: j .LBB20_8 ; RV32-NEXT: .LBB20_7: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 ; RV32-NEXT: .LBB20_8: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB20_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 ; RV32-NEXT: j .LBB20_11 ; RV32-NEXT: .LBB20_10: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 ; RV32-NEXT: .LBB20_11: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB20_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 ; RV32-NEXT: j .LBB20_14 ; RV32-NEXT: .LBB20_13: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 ; RV32-NEXT: .LBB20_14: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB20_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 ; RV32-NEXT: j .LBB20_17 ; RV32-NEXT: .LBB20_16: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 ; RV32-NEXT: .LBB20_17: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB20_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 ; RV32-NEXT: j .LBB20_20 ; RV32-NEXT: .LBB20_19: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 2 ; RV32-NEXT: .LBB20_20: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB20_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 1 ; RV32-NEXT: j .LBB20_23 ; RV32-NEXT: .LBB20_22: -; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 ; RV32-NEXT: .LBB20_23: ; RV32-NEXT: vfmv.f.s ft0, v28 @@ -3022,66 +2673,54 @@ ; RV64-NEXT: fsd ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB20_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 ; RV64-NEXT: j .LBB20_8 ; RV64-NEXT: .LBB20_7: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 ; RV64-NEXT: .LBB20_8: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB20_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 ; RV64-NEXT: j .LBB20_11 ; RV64-NEXT: .LBB20_10: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 ; RV64-NEXT: .LBB20_11: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB20_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 ; RV64-NEXT: j .LBB20_14 ; RV64-NEXT: .LBB20_13: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 ; RV64-NEXT: .LBB20_14: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB20_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 ; RV64-NEXT: j .LBB20_17 ; RV64-NEXT: .LBB20_16: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 ; RV64-NEXT: .LBB20_17: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB20_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 ; RV64-NEXT: j .LBB20_20 ; RV64-NEXT: .LBB20_19: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 2 ; RV64-NEXT: .LBB20_20: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB20_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 1 ; RV64-NEXT: j .LBB20_23 ; RV64-NEXT: .LBB20_22: -; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 ; RV64-NEXT: .LBB20_23: ; RV64-NEXT: vfmv.f.s ft0, v28 @@ -3133,66 +2772,54 @@ ; RV32-NEXT: fsd ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB21_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 6 ; RV32-NEXT: j .LBB21_8 ; RV32-NEXT: .LBB21_7: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 6 ; RV32-NEXT: .LBB21_8: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB21_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 5 ; RV32-NEXT: j .LBB21_11 ; RV32-NEXT: .LBB21_10: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 5 ; RV32-NEXT: .LBB21_11: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB21_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 4 ; RV32-NEXT: j .LBB21_14 ; RV32-NEXT: .LBB21_13: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 4 ; RV32-NEXT: .LBB21_14: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB21_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 3 ; RV32-NEXT: j .LBB21_17 ; RV32-NEXT: .LBB21_16: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 3 ; RV32-NEXT: .LBB21_17: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB21_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 2 ; RV32-NEXT: j .LBB21_20 ; RV32-NEXT: .LBB21_19: -; RV32-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 2 ; RV32-NEXT: .LBB21_20: ; RV32-NEXT: vfmv.f.s ft0, v28 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB21_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v12, 1 ; RV32-NEXT: j .LBB21_23 ; RV32-NEXT: .LBB21_22: -; RV32-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV32-NEXT: vslidedown.vi v28, v8, 1 ; RV32-NEXT: .LBB21_23: ; RV32-NEXT: vfmv.f.s ft0, v28 @@ -3240,66 +2867,54 @@ ; RV64-NEXT: fsd ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB21_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 6 ; RV64-NEXT: j .LBB21_8 ; RV64-NEXT: .LBB21_7: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 6 ; RV64-NEXT: .LBB21_8: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB21_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 5 ; RV64-NEXT: j .LBB21_11 ; RV64-NEXT: .LBB21_10: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 5 ; RV64-NEXT: .LBB21_11: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB21_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 4 ; RV64-NEXT: j .LBB21_14 ; RV64-NEXT: .LBB21_13: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 4 ; RV64-NEXT: .LBB21_14: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB21_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 3 ; RV64-NEXT: j .LBB21_17 ; RV64-NEXT: .LBB21_16: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 3 ; RV64-NEXT: .LBB21_17: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB21_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 2 ; RV64-NEXT: j .LBB21_20 ; RV64-NEXT: .LBB21_19: -; RV64-NEXT: vsetivli a1, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 2 ; RV64-NEXT: .LBB21_20: ; RV64-NEXT: vfmv.f.s ft0, v28 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB21_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v12, 1 ; RV64-NEXT: j .LBB21_23 ; RV64-NEXT: .LBB21_22: -; RV64-NEXT: vsetivli a0, 1, e64,m4,ta,mu ; RV64-NEXT: vslidedown.vi v28, v8, 1 ; RV64-NEXT: .LBB21_23: ; RV64-NEXT: vfmv.f.s ft0, v28 @@ -3351,154 +2966,126 @@ ; RV32-NEXT: fsd ft0, 120(sp) ; RV32-NEXT: bnez a0, .LBB22_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 14 ; RV32-NEXT: j .LBB22_8 ; RV32-NEXT: .LBB22_7: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 14 ; RV32-NEXT: .LBB22_8: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 112(sp) ; RV32-NEXT: bnez a0, .LBB22_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 13 ; RV32-NEXT: j .LBB22_11 ; RV32-NEXT: .LBB22_10: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 13 ; RV32-NEXT: .LBB22_11: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 104(sp) ; RV32-NEXT: bnez a0, .LBB22_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 12 ; RV32-NEXT: j .LBB22_14 ; RV32-NEXT: .LBB22_13: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 12 ; RV32-NEXT: .LBB22_14: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 96(sp) ; RV32-NEXT: bnez a0, .LBB22_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 11 ; RV32-NEXT: j .LBB22_17 ; RV32-NEXT: .LBB22_16: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 11 ; RV32-NEXT: .LBB22_17: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 88(sp) ; RV32-NEXT: bnez a0, .LBB22_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 10 ; RV32-NEXT: j .LBB22_20 ; RV32-NEXT: .LBB22_19: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 10 ; RV32-NEXT: .LBB22_20: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 80(sp) ; RV32-NEXT: bnez a0, .LBB22_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 9 ; RV32-NEXT: j .LBB22_23 ; RV32-NEXT: .LBB22_22: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 9 ; RV32-NEXT: .LBB22_23: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 72(sp) ; RV32-NEXT: bnez a0, .LBB22_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 8 ; RV32-NEXT: j .LBB22_26 ; RV32-NEXT: .LBB22_25: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 8 ; RV32-NEXT: .LBB22_26: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 64(sp) ; RV32-NEXT: bnez a0, .LBB22_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 7 ; RV32-NEXT: j .LBB22_29 ; RV32-NEXT: .LBB22_28: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 7 ; RV32-NEXT: .LBB22_29: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB22_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 6 ; RV32-NEXT: j .LBB22_32 ; RV32-NEXT: .LBB22_31: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 6 ; RV32-NEXT: .LBB22_32: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB22_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 5 ; RV32-NEXT: j .LBB22_35 ; RV32-NEXT: .LBB22_34: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 5 ; RV32-NEXT: .LBB22_35: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB22_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 4 ; RV32-NEXT: j .LBB22_38 ; RV32-NEXT: .LBB22_37: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 4 ; RV32-NEXT: .LBB22_38: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB22_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 3 ; RV32-NEXT: j .LBB22_41 ; RV32-NEXT: .LBB22_40: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 3 ; RV32-NEXT: .LBB22_41: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB22_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 2 ; RV32-NEXT: j .LBB22_44 ; RV32-NEXT: .LBB22_43: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 2 ; RV32-NEXT: .LBB22_44: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB22_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v8, v16, 1 ; RV32-NEXT: j .LBB22_47 ; RV32-NEXT: .LBB22_46: -; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: .LBB22_47: ; RV32-NEXT: vfmv.f.s ft0, v8 @@ -3545,154 +3132,126 @@ ; RV64-NEXT: fsd ft0, 120(sp) ; RV64-NEXT: bnez a0, .LBB22_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 14 ; RV64-NEXT: j .LBB22_8 ; RV64-NEXT: .LBB22_7: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 14 ; RV64-NEXT: .LBB22_8: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 112(sp) ; RV64-NEXT: bnez a0, .LBB22_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 13 ; RV64-NEXT: j .LBB22_11 ; RV64-NEXT: .LBB22_10: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 13 ; RV64-NEXT: .LBB22_11: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 104(sp) ; RV64-NEXT: bnez a0, .LBB22_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 12 ; RV64-NEXT: j .LBB22_14 ; RV64-NEXT: .LBB22_13: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 12 ; RV64-NEXT: .LBB22_14: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 96(sp) ; RV64-NEXT: bnez a0, .LBB22_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 11 ; RV64-NEXT: j .LBB22_17 ; RV64-NEXT: .LBB22_16: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 11 ; RV64-NEXT: .LBB22_17: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 88(sp) ; RV64-NEXT: bnez a0, .LBB22_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 10 ; RV64-NEXT: j .LBB22_20 ; RV64-NEXT: .LBB22_19: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 10 ; RV64-NEXT: .LBB22_20: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 80(sp) ; RV64-NEXT: bnez a0, .LBB22_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 9 ; RV64-NEXT: j .LBB22_23 ; RV64-NEXT: .LBB22_22: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 9 ; RV64-NEXT: .LBB22_23: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 72(sp) ; RV64-NEXT: bnez a0, .LBB22_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 8 ; RV64-NEXT: j .LBB22_26 ; RV64-NEXT: .LBB22_25: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 8 ; RV64-NEXT: .LBB22_26: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 64(sp) ; RV64-NEXT: bnez a0, .LBB22_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 7 ; RV64-NEXT: j .LBB22_29 ; RV64-NEXT: .LBB22_28: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 7 ; RV64-NEXT: .LBB22_29: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB22_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 6 ; RV64-NEXT: j .LBB22_32 ; RV64-NEXT: .LBB22_31: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 6 ; RV64-NEXT: .LBB22_32: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB22_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 5 ; RV64-NEXT: j .LBB22_35 ; RV64-NEXT: .LBB22_34: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 5 ; RV64-NEXT: .LBB22_35: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB22_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 4 ; RV64-NEXT: j .LBB22_38 ; RV64-NEXT: .LBB22_37: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 4 ; RV64-NEXT: .LBB22_38: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB22_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 3 ; RV64-NEXT: j .LBB22_41 ; RV64-NEXT: .LBB22_40: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 3 ; RV64-NEXT: .LBB22_41: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB22_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 2 ; RV64-NEXT: j .LBB22_44 ; RV64-NEXT: .LBB22_43: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 2 ; RV64-NEXT: .LBB22_44: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB22_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v16, 1 ; RV64-NEXT: j .LBB22_47 ; RV64-NEXT: .LBB22_46: -; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: .LBB22_47: ; RV64-NEXT: vfmv.f.s ft0, v8 @@ -3744,154 +3303,126 @@ ; RV32-NEXT: fsd ft0, 120(sp) ; RV32-NEXT: bnez a0, .LBB23_7 ; RV32-NEXT: # %bb.6: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 14 ; RV32-NEXT: j .LBB23_8 ; RV32-NEXT: .LBB23_7: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 14 ; RV32-NEXT: .LBB23_8: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 112(sp) ; RV32-NEXT: bnez a0, .LBB23_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 13 ; RV32-NEXT: j .LBB23_11 ; RV32-NEXT: .LBB23_10: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 13 ; RV32-NEXT: .LBB23_11: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 104(sp) ; RV32-NEXT: bnez a0, .LBB23_13 ; RV32-NEXT: # %bb.12: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 12 ; RV32-NEXT: j .LBB23_14 ; RV32-NEXT: .LBB23_13: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 12 ; RV32-NEXT: .LBB23_14: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 96(sp) ; RV32-NEXT: bnez a0, .LBB23_16 ; RV32-NEXT: # %bb.15: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 11 ; RV32-NEXT: j .LBB23_17 ; RV32-NEXT: .LBB23_16: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 11 ; RV32-NEXT: .LBB23_17: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 88(sp) ; RV32-NEXT: bnez a0, .LBB23_19 ; RV32-NEXT: # %bb.18: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 10 ; RV32-NEXT: j .LBB23_20 ; RV32-NEXT: .LBB23_19: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 10 ; RV32-NEXT: .LBB23_20: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 80(sp) ; RV32-NEXT: bnez a0, .LBB23_22 ; RV32-NEXT: # %bb.21: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 9 ; RV32-NEXT: j .LBB23_23 ; RV32-NEXT: .LBB23_22: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 9 ; RV32-NEXT: .LBB23_23: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 72(sp) ; RV32-NEXT: bnez a0, .LBB23_25 ; RV32-NEXT: # %bb.24: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 8 ; RV32-NEXT: j .LBB23_26 ; RV32-NEXT: .LBB23_25: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 8 ; RV32-NEXT: .LBB23_26: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 64(sp) ; RV32-NEXT: bnez a0, .LBB23_28 ; RV32-NEXT: # %bb.27: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 7 ; RV32-NEXT: j .LBB23_29 ; RV32-NEXT: .LBB23_28: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 7 ; RV32-NEXT: .LBB23_29: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 56(sp) ; RV32-NEXT: bnez a0, .LBB23_31 ; RV32-NEXT: # %bb.30: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 6 ; RV32-NEXT: j .LBB23_32 ; RV32-NEXT: .LBB23_31: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 6 ; RV32-NEXT: .LBB23_32: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 48(sp) ; RV32-NEXT: bnez a0, .LBB23_34 ; RV32-NEXT: # %bb.33: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 5 ; RV32-NEXT: j .LBB23_35 ; RV32-NEXT: .LBB23_34: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 5 ; RV32-NEXT: .LBB23_35: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 40(sp) ; RV32-NEXT: bnez a0, .LBB23_37 ; RV32-NEXT: # %bb.36: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 4 ; RV32-NEXT: j .LBB23_38 ; RV32-NEXT: .LBB23_37: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 4 ; RV32-NEXT: .LBB23_38: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 32(sp) ; RV32-NEXT: bnez a0, .LBB23_40 ; RV32-NEXT: # %bb.39: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 3 ; RV32-NEXT: j .LBB23_41 ; RV32-NEXT: .LBB23_40: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 3 ; RV32-NEXT: .LBB23_41: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 24(sp) ; RV32-NEXT: bnez a0, .LBB23_43 ; RV32-NEXT: # %bb.42: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v16, 2 ; RV32-NEXT: j .LBB23_44 ; RV32-NEXT: .LBB23_43: -; RV32-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v24, v8, 2 ; RV32-NEXT: .LBB23_44: ; RV32-NEXT: vfmv.f.s ft0, v24 ; RV32-NEXT: fsd ft0, 16(sp) ; RV32-NEXT: bnez a0, .LBB23_46 ; RV32-NEXT: # %bb.45: -; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v8, v16, 1 ; RV32-NEXT: j .LBB23_47 ; RV32-NEXT: .LBB23_46: -; RV32-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: .LBB23_47: ; RV32-NEXT: vfmv.f.s ft0, v8 @@ -3939,154 +3470,126 @@ ; RV64-NEXT: fsd ft0, 120(sp) ; RV64-NEXT: bnez a0, .LBB23_7 ; RV64-NEXT: # %bb.6: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 14 ; RV64-NEXT: j .LBB23_8 ; RV64-NEXT: .LBB23_7: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 14 ; RV64-NEXT: .LBB23_8: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 112(sp) ; RV64-NEXT: bnez a0, .LBB23_10 ; RV64-NEXT: # %bb.9: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 13 ; RV64-NEXT: j .LBB23_11 ; RV64-NEXT: .LBB23_10: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 13 ; RV64-NEXT: .LBB23_11: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 104(sp) ; RV64-NEXT: bnez a0, .LBB23_13 ; RV64-NEXT: # %bb.12: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 12 ; RV64-NEXT: j .LBB23_14 ; RV64-NEXT: .LBB23_13: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 12 ; RV64-NEXT: .LBB23_14: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 96(sp) ; RV64-NEXT: bnez a0, .LBB23_16 ; RV64-NEXT: # %bb.15: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 11 ; RV64-NEXT: j .LBB23_17 ; RV64-NEXT: .LBB23_16: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 11 ; RV64-NEXT: .LBB23_17: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 88(sp) ; RV64-NEXT: bnez a0, .LBB23_19 ; RV64-NEXT: # %bb.18: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 10 ; RV64-NEXT: j .LBB23_20 ; RV64-NEXT: .LBB23_19: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 10 ; RV64-NEXT: .LBB23_20: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 80(sp) ; RV64-NEXT: bnez a0, .LBB23_22 ; RV64-NEXT: # %bb.21: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 9 ; RV64-NEXT: j .LBB23_23 ; RV64-NEXT: .LBB23_22: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 9 ; RV64-NEXT: .LBB23_23: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 72(sp) ; RV64-NEXT: bnez a0, .LBB23_25 ; RV64-NEXT: # %bb.24: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 8 ; RV64-NEXT: j .LBB23_26 ; RV64-NEXT: .LBB23_25: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 8 ; RV64-NEXT: .LBB23_26: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 64(sp) ; RV64-NEXT: bnez a0, .LBB23_28 ; RV64-NEXT: # %bb.27: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 7 ; RV64-NEXT: j .LBB23_29 ; RV64-NEXT: .LBB23_28: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 7 ; RV64-NEXT: .LBB23_29: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 56(sp) ; RV64-NEXT: bnez a0, .LBB23_31 ; RV64-NEXT: # %bb.30: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 6 ; RV64-NEXT: j .LBB23_32 ; RV64-NEXT: .LBB23_31: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 6 ; RV64-NEXT: .LBB23_32: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 48(sp) ; RV64-NEXT: bnez a0, .LBB23_34 ; RV64-NEXT: # %bb.33: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 5 ; RV64-NEXT: j .LBB23_35 ; RV64-NEXT: .LBB23_34: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 5 ; RV64-NEXT: .LBB23_35: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 40(sp) ; RV64-NEXT: bnez a0, .LBB23_37 ; RV64-NEXT: # %bb.36: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 4 ; RV64-NEXT: j .LBB23_38 ; RV64-NEXT: .LBB23_37: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 4 ; RV64-NEXT: .LBB23_38: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 32(sp) ; RV64-NEXT: bnez a0, .LBB23_40 ; RV64-NEXT: # %bb.39: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 3 ; RV64-NEXT: j .LBB23_41 ; RV64-NEXT: .LBB23_40: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 3 ; RV64-NEXT: .LBB23_41: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 24(sp) ; RV64-NEXT: bnez a0, .LBB23_43 ; RV64-NEXT: # %bb.42: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v16, 2 ; RV64-NEXT: j .LBB23_44 ; RV64-NEXT: .LBB23_43: -; RV64-NEXT: vsetivli a1, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v24, v8, 2 ; RV64-NEXT: .LBB23_44: ; RV64-NEXT: vfmv.f.s ft0, v24 ; RV64-NEXT: fsd ft0, 16(sp) ; RV64-NEXT: bnez a0, .LBB23_46 ; RV64-NEXT: # %bb.45: -; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v16, 1 ; RV64-NEXT: j .LBB23_47 ; RV64-NEXT: .LBB23_46: -; RV64-NEXT: vsetivli a0, 1, e64,m8,ta,mu ; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: .LBB23_47: ; RV64-NEXT: vfmv.f.s ft0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -0,0 +1,447 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+experimental-v \ +; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s + +; The following tests check whether inserting VSETVLI avoids inserting +; unneeded vsetvlis across basic blocks. + +declare i64 @llvm.riscv.vsetvli(i64, i64, i64) + +declare @llvm.riscv.vfadd.nxv1f64.nxv1f64(, , i64) +declare @llvm.riscv.vfadd.nxv2f32.nxv2f32(, , i64) + +declare @llvm.riscv.vfsub.nxv1f64.nxv1f64(, , i64) + +declare @llvm.riscv.vfmul.nxv1f64.nxv1f64(, , i64) + +declare @llvm.riscv.vfmv.v.f.nxv1f64.f64(double, i64) +declare @llvm.riscv.vfmv.v.f.nxv2f32.f32(float, i64) + +declare void @llvm.riscv.vse.nxv1f64(, * nocapture, i64) +declare void @llvm.riscv.vse.nxv2f32(, * nocapture, i64) + +define @test1(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: beqz a1, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %if.else +; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + ret %c.0 +} + +@scratch = global i8 0, align 16 + +define @test2(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: beqz a1, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vfadd.vv v25, v8, v9 +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %if.else +; CHECK-NEXT: vfsub.vv v25, v8, v9 +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + %3 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %c.0, %a, i64 %0) + ret %3 +} + +; FIXME: The last vsetvli is redundant, but we need to look through a phi to +; prove it. +define @test3(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz a1, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfadd.vv v25, v8, v9 +; CHECK-NEXT: j .LBB2_3 +; CHECK-NEXT: .LBB2_2: # %if.else +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfsub.vv v25, v8, v9 +; CHECK-NEXT: .LBB2_3: # %if.end +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %3 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %2) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %vl.0 = phi i64 [ %0, %if.then], [ %2, %if.else ] + %c.0 = phi [ %1, %if.then ], [ %3, %if.else ] + %4 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %c.0, %a, i64 %vl.0) + ret %4 +} + +define @test4(i64 %avl, i8 zeroext %cond, %l, %r) nounwind { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: beqz a1, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: lui a1, %hi(.LCPI3_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_0) +; CHECK-NEXT: vsetvli a2, a0, e64,m1,ta,mu +; CHECK-NEXT: vlse64.v v25, (a1), zero +; CHECK-NEXT: lui a1, %hi(.LCPI3_1) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_1) +; CHECK-NEXT: vlse64.v v26, (a1), zero +; CHECK-NEXT: vfadd.vv v25, v25, v26 +; CHECK-NEXT: lui a1, %hi(scratch) +; CHECK-NEXT: addi a1, a1, %lo(scratch) +; CHECK-NEXT: vse64.v v25, (a1) +; CHECK-NEXT: j .LBB3_3 +; CHECK-NEXT: .LBB3_2: # %if.else +; CHECK-NEXT: lui a1, %hi(.LCPI3_2) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_2) +; CHECK-NEXT: vsetvli a2, a0, e32,m1,ta,mu +; CHECK-NEXT: vlse32.v v25, (a1), zero +; CHECK-NEXT: lui a1, %hi(.LCPI3_3) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI3_3) +; CHECK-NEXT: vlse32.v v26, (a1), zero +; CHECK-NEXT: vfadd.vv v25, v25, v26 +; CHECK-NEXT: lui a1, %hi(scratch) +; CHECK-NEXT: addi a1, a1, %lo(scratch) +; CHECK-NEXT: vse32.v v25, (a1) +; CHECK-NEXT: .LBB3_3: # %if.end +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %0 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 1.000000e+00, i64 %avl) + %1 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 2.000000e+00, i64 %avl) + %2 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %0, %1, i64 %avl) + %3 = bitcast i8* @scratch to * + tail call void @llvm.riscv.vse.nxv1f64( %2, * %3, i64 %avl) + br label %if.end + +if.else: ; preds = %entry + %4 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 1.000000e+00, i64 %avl) + %5 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 2.000000e+00, i64 %avl) + %6 = tail call @llvm.riscv.vfadd.nxv2f32.nxv2f32( %4, %5, i64 %avl) + %7 = bitcast i8* @scratch to * + tail call void @llvm.riscv.vse.nxv2f32( %6, * %7, i64 %avl) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %8 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %l, %r, i64 %avl) + ret %8 +} + +define @test5(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a2, a1, 1 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: bnez a2, .LBB4_3 +; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: vfsub.vv v25, v8, v9 +; CHECK-NEXT: andi a0, a1, 2 +; CHECK-NEXT: beqz a0, .LBB4_4 +; CHECK-NEXT: .LBB4_2: # %if.then4 +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_3: # %if.then +; CHECK-NEXT: vfadd.vv v25, v8, v9 +; CHECK-NEXT: andi a0, a1, 2 +; CHECK-NEXT: bnez a0, .LBB4_2 +; CHECK-NEXT: .LBB4_4: # %if.else5 +; CHECK-NEXT: vfmul.vv v8, v8, v25 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %conv = zext i8 %cond to i32 + %and = and i32 %conv, 1 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + %and2 = and i32 %conv, 2 + %tobool3 = icmp eq i32 %and2, 0 + br i1 %tobool3, label %if.else5, label %if.then4 + +if.then4: ; preds = %if.end + %3 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %c.0, %a, i64 %0) + br label %if.end6 + +if.else5: ; preds = %if.end + %4 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %a, %c.0, i64 %0) + br label %if.end6 + +if.end6: ; preds = %if.else5, %if.then4 + %c.1 = phi [ %3, %if.then4 ], [ %4, %if.else5 ] + ret %c.1 +} + +; FIXME: The explicit vsetvli in if.then4 could be removed as it is redundant +; with the one in the entry, but we lack the ability to remove explicit +; vsetvli instructions. +define @test6(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a3, a1, 1 +; CHECK-NEXT: vsetvli a2, a0, e64,m1,ta,mu +; CHECK-NEXT: bnez a3, .LBB5_3 +; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: vfsub.vv v25, v8, v9 +; CHECK-NEXT: andi a1, a1, 2 +; CHECK-NEXT: beqz a1, .LBB5_4 +; CHECK-NEXT: .LBB5_2: # %if.then4 +; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) +; CHECK-NEXT: vlse64.v v26, (a0), zero +; CHECK-NEXT: lui a0, %hi(.LCPI5_1) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_1) +; CHECK-NEXT: vlse64.v v27, (a0), zero +; CHECK-NEXT: vfadd.vv v26, v26, v27 +; CHECK-NEXT: lui a0, %hi(scratch) +; CHECK-NEXT: addi a0, a0, %lo(scratch) +; CHECK-NEXT: vse64.v v26, (a0) +; CHECK-NEXT: j .LBB5_5 +; CHECK-NEXT: .LBB5_3: # %if.then +; CHECK-NEXT: vfadd.vv v25, v8, v9 +; CHECK-NEXT: andi a1, a1, 2 +; CHECK-NEXT: bnez a1, .LBB5_2 +; CHECK-NEXT: .LBB5_4: # %if.else5 +; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu +; CHECK-NEXT: lui a0, %hi(.LCPI5_2) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_2) +; CHECK-NEXT: vlse32.v v26, (a0), zero +; CHECK-NEXT: lui a0, %hi(.LCPI5_3) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_3) +; CHECK-NEXT: vlse32.v v27, (a0), zero +; CHECK-NEXT: vfadd.vv v26, v26, v27 +; CHECK-NEXT: lui a0, %hi(scratch) +; CHECK-NEXT: addi a0, a0, %lo(scratch) +; CHECK-NEXT: vse32.v v26, (a0) +; CHECK-NEXT: .LBB5_5: # %if.end10 +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: vfmul.vv v8, v25, v25 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %conv = zext i8 %cond to i32 + %and = and i32 %conv, 1 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + %and2 = and i32 %conv, 2 + %tobool3 = icmp eq i32 %and2, 0 + br i1 %tobool3, label %if.else5, label %if.then4 + +if.then4: ; preds = %if.end + %3 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %4 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 1.000000e+00, i64 %3) + %5 = tail call @llvm.riscv.vfmv.v.f.nxv1f64.f64(double 2.000000e+00, i64 %3) + %6 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %4, %5, i64 %3) + %7 = bitcast i8* @scratch to * + tail call void @llvm.riscv.vse.nxv1f64( %6, * %7, i64 %3) + br label %if.end10 + +if.else5: ; preds = %if.end + %8 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 2, i64 0) + %9 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 1.000000e+00, i64 %8) + %10 = tail call @llvm.riscv.vfmv.v.f.nxv2f32.f32(float 2.000000e+00, i64 %8) + %11 = tail call @llvm.riscv.vfadd.nxv2f32.nxv2f32( %9, %10, i64 %8) + %12 = bitcast i8* @scratch to * + tail call void @llvm.riscv.vse.nxv2f32( %11, * %12, i64 %8) + br label %if.end10 + +if.end10: ; preds = %if.else5, %if.then4 + %13 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %c.0, %c.0, i64 %0) + ret %13 +} + +declare void @foo() + +; Similar to test1, but contains a call to @foo to act as barrier to analyzing +; VL/VTYPE. +define @test8(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: vsetvli s0, a0, e64,m1,ta,mu +; CHECK-NEXT: beqz a1, .LBB6_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: j .LBB6_3 +; CHECK-NEXT: .LBB6_2: # %if.else +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: call foo@plt +; CHECK-NEXT: vsetvli a0, s0, e64,m1,ta,mu +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfsub.vv v8, v26, v25 +; CHECK-NEXT: .LBB6_3: # %if.then +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.else: ; preds = %entry + call void @foo() + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + ret %c.0 +} + +; Similar to test2, but contains a call to @foo to act as barrier to analyzing +; VL/VTYPE. +define @test9(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { +; CHECK-LABEL: test9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: vsetvli s0, a0, e64,m1,ta,mu +; CHECK-NEXT: beqz a1, .LBB7_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: vfadd.vv v25, v8, v9 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: call foo@plt +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: j .LBB7_3 +; CHECK-NEXT: .LBB7_2: # %if.else +; CHECK-NEXT: vfsub.vv v25, v8, v9 +; CHECK-NEXT: .LBB7_3: # %if.end +; CHECK-NEXT: vsetvli a0, s0, e64,m1,ta,mu +; CHECK-NEXT: vfmul.vv v8, v25, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64( %a, %b, i64 %0) + call void @foo() + br label %if.end + +if.else: ; preds = %entry + %2 = tail call @llvm.riscv.vfsub.nxv1f64.nxv1f64( %a, %b, i64 %0) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %c.0 = phi [ %1, %if.then ], [ %2, %if.else ] + %3 = tail call @llvm.riscv.vfmul.nxv1f64.nxv1f64( %c.0, %a, i64 %0) + ret %3 +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -0,0 +1,415 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \ +# RUN: -run-pass=riscv-insert-vsetvli | FileCheck %s + +--- | + ; ModuleID = 'vsetvli-insert.ll' + source_filename = "vsetvli-insert.ll" + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "riscv64" + + define @load_add_or_sub(i8 zeroext %cond, * %0, %1, i64 %2) #0 { + entry: + %a = call @llvm.riscv.vle.nxv1i64.i64(* %0, i64 %2) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + + if.then: ; preds = %entry + %b = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %a, %1, i64 %2) + br label %if.end + + if.else: ; preds = %entry + %c = call @llvm.riscv.vsub.nxv1i64.nxv1i64.i64( %a, %1, i64 %2) + br label %if.end + + if.end: ; preds = %if.else, %if.then + %d = phi [ %b, %if.then ], [ %c, %if.else ] + ret %d + } + + define void @load_zext_or_sext(i8 zeroext %cond, * %0, * %1, i64 %2) #0 { + entry: + %a = call @llvm.riscv.vle.nxv1i32.i64(* %0, i64 %2) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + + if.then: ; preds = %entry + %b = call @llvm.riscv.vzext.nxv1i64.nxv1i32.i64( %a, i64 %2) + br label %if.end + + if.else: ; preds = %entry + %c = call @llvm.riscv.vsext.nxv1i64.nxv1i32.i64( %a, i64 %2) + br label %if.end + + if.end: ; preds = %if.else, %if.then + %d = phi [ %b, %if.then ], [ %c, %if.else ] + call void @llvm.riscv.vse.nxv1i64.i64( %d, * %1, i64 %2) + ret void + } + + ; Function Attrs: nounwind readnone + declare i64 @llvm.riscv.vmv.x.s.nxv1i64() #1 + + define i64 @vmv_x_s(i8 zeroext %cond, %0, %1, i64 %2) #0 { + entry: + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + + if.then: ; preds = %entry + %a = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %0, %1, i64 %2) + br label %if.end + + if.else: ; preds = %entry + %b = call @llvm.riscv.vsub.nxv1i64.nxv1i64.i64( %1, %1, i64 %2) + br label %if.end + + if.end: ; preds = %if.else, %if.then + %c = phi [ %a, %if.then ], [ %b, %if.else ] + %d = call i64 @llvm.riscv.vmv.x.s.nxv1i64( %c) + ret i64 %d + } + + ; Function Attrs: nounwind + declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #2 + + define @vsetvli_add_or_sub(i8 zeroext %cond, %0, %1, i64 %avl) #0 { + entry: + %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 3, i64 0) + %tobool = icmp eq i8 %cond, 0 + br i1 %tobool, label %if.else, label %if.then + + if.then: ; preds = %entry + %b = call @llvm.riscv.vadd.nxv1i64.nxv1i64.i64( %0, %1, i64 %vl) + br label %if.end + + if.else: ; preds = %entry + %c = call @llvm.riscv.vsub.nxv1i64.nxv1i64.i64( %0, %1, i64 %vl) + br label %if.end + + if.end: ; preds = %if.else, %if.then + %d = phi [ %b, %if.then ], [ %c, %if.else ] + ret %d + } + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(, , i64) #1 + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(, , i64) #1 + + ; Function Attrs: nounwind readonly + declare @llvm.riscv.vle.nxv1i64.i64(* nocapture, i64) #3 + + ; Function Attrs: nounwind readonly + declare @llvm.riscv.vle.nxv1i32.i64(* nocapture, i64) #3 + + ; Function Attrs: nounwind writeonly + declare void @llvm.riscv.vse.nxv1i64.i64(, * nocapture, i64) #4 + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(, i64) #1 + + ; Function Attrs: nounwind readnone + declare @llvm.riscv.vsext.nxv1i64.nxv1i32.i64(, i64) #1 + + attributes #0 = { "target-features"="+experimental-v" } + attributes #1 = { nounwind readnone } + attributes #2 = { nounwind } + attributes #3 = { nounwind readonly } + attributes #4 = { nounwind writeonly } + +... +--- +name: load_add_or_sub +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: vr } + - { id: 3, class: vr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: vr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%4' } + - { reg: '$x11', virtual-reg: '%5' } + - { reg: '$v8', virtual-reg: '%6' } + - { reg: '$x12', virtual-reg: '%7' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: load_add_or_sub + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $x10, $x11, $v8, $x12 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x12 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v8 + ; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2 + ; CHECK: PseudoBR %bb.1 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoBR %bb.3 + ; CHECK: bb.2.if.else: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: bb.3.if.end: + ; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 + ; CHECK: $v8 = COPY [[PHI]] + ; CHECK: PseudoRET implicit $v8 + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $x10, $x11, $v8, $x12 + + %7:gpr = COPY $x12 + %6:vr = COPY $v8 + %5:gpr = COPY $x11 + %4:gpr = COPY $x10 + %0:vr = PseudoVLE64_V_M1 %5, %7, 6 + %8:gpr = COPY $x0 + BEQ %4, %8, %bb.2 + PseudoBR %bb.1 + + bb.1.if.then: + %1:vr = PseudoVADD_VV_M1 %0, %6, %7, 6 + PseudoBR %bb.3 + + bb.2.if.else: + %2:vr = PseudoVSUB_VV_M1 %0, %6, %7, 6 + + bb.3.if.end: + %3:vr = PHI %1, %bb.1, %2, %bb.2 + $v8 = COPY %3 + PseudoRET implicit $v8 + +... +--- +name: load_zext_or_sext +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: vr } + - { id: 3, class: vr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%4' } + - { reg: '$x11', virtual-reg: '%5' } + - { reg: '$x12', virtual-reg: '%6' } + - { reg: '$x13', virtual-reg: '%7' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: load_zext_or_sext + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $x10, $x11, $x12, $x13 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; CHECK: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 87, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY2]], $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2 + ; CHECK: PseudoBR %bb.1 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: dead %10:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoBR %bb.3 + ; CHECK: bb.2.if.else: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: dead %11:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: early-clobber %2:vr = PseudoVSEXT_VF2_M1 [[PseudoVLE32_V_MF2_]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: bb.3.if.end: + ; CHECK: [[PHI:%[0-9]+]]:vr = PHI %1, %bb.1, %2, %bb.2 + ; CHECK: PseudoVSE64_V_M1 [[PHI]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoRET + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $x10, $x11, $x12, $x13 + + %7:gpr = COPY $x13 + %6:gpr = COPY $x12 + %5:gpr = COPY $x11 + %4:gpr = COPY $x10 + %0:vr = PseudoVLE32_V_MF2 %5, %7, 5 + %8:gpr = COPY $x0 + BEQ %4, %8, %bb.2 + PseudoBR %bb.1 + + bb.1.if.then: + early-clobber %1:vr = PseudoVZEXT_VF2_M1 %0, %7, 6 + PseudoBR %bb.3 + + bb.2.if.else: + early-clobber %2:vr = PseudoVSEXT_VF2_M1 %0, %7, 6 + + bb.3.if.end: + %3:vr = PHI %1, %bb.1, %2, %bb.2 + PseudoVSE64_V_M1 %3, %6, %7, 6 + PseudoRET + +... +--- +name: vmv_x_s +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vr } + - { id: 1, class: vr } + - { id: 2, class: vr } + - { id: 3, class: gpr } + - { id: 4, class: vr } + - { id: 5, class: vr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%3' } + - { reg: '$v8', virtual-reg: '%4' } + - { reg: '$v9', virtual-reg: '%5' } + - { reg: '$x11', virtual-reg: '%6' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: vmv_x_s + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $x10, $v8, $v9, $x11 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2 + ; CHECK: PseudoBR %bb.1 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: dead %9:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoBR %bb.3 + ; CHECK: bb.2.if.else: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: dead %10:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY1]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: bb.3.if.end: + ; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 + ; CHECK: [[PseudoVMV_X_S_M1_:%[0-9]+]]:gpr = PseudoVMV_X_S_M1 [[PHI]], 6, implicit $vtype + ; CHECK: $x10 = COPY [[PseudoVMV_X_S_M1_]] + ; CHECK: PseudoRET implicit $x10 + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $x10, $v8, $v9, $x11 + + %6:gpr = COPY $x11 + %5:vr = COPY $v9 + %4:vr = COPY $v8 + %3:gpr = COPY $x10 + %7:gpr = COPY $x0 + BEQ %3, %7, %bb.2 + PseudoBR %bb.1 + + bb.1.if.then: + %0:vr = PseudoVADD_VV_M1 %4, %5, %6, 6 + PseudoBR %bb.3 + + bb.2.if.else: + %1:vr = PseudoVSUB_VV_M1 %5, %5, %6, 6 + + bb.3.if.end: + %2:vr = PHI %0, %bb.1, %1, %bb.2 + %8:gpr = PseudoVMV_X_S_M1 %2, 6 + $x10 = COPY %8 + PseudoRET implicit $x10 + +... +--- +name: vsetvli_add_or_sub +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: vr } + - { id: 2, class: vr } + - { id: 3, class: vr } + - { id: 4, class: gpr } + - { id: 5, class: vr } + - { id: 6, class: vr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%4' } + - { reg: '$v8', virtual-reg: '%5' } + - { reg: '$v9', virtual-reg: '%6' } + - { reg: '$x11', virtual-reg: '%7' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: vsetvli_add_or_sub + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.2(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $x10, $v8, $v9, $x11 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; CHECK: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI [[COPY]], 88, implicit-def $vl, implicit-def $vtype + ; CHECK: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; CHECK: BEQ [[COPY3]], [[COPY4]], %bb.2 + ; CHECK: PseudoBR %bb.1 + ; CHECK: bb.1.if.then: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: PseudoBR %bb.3 + ; CHECK: bb.2.if.else: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6, implicit $vl, implicit $vtype + ; CHECK: bb.3.if.end: + ; CHECK: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 + ; CHECK: $v8 = COPY [[PHI]] + ; CHECK: PseudoRET implicit $v8 + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $x10, $v8, $v9, $x11 + + %7:gpr = COPY $x11 + %6:vr = COPY $v9 + %5:vr = COPY $v8 + %4:gpr = COPY $x10 + %0:gpr = PseudoVSETVLI %7, 88, implicit-def dead $vl, implicit-def dead $vtype + %8:gpr = COPY $x0 + BEQ %4, %8, %bb.2 + PseudoBR %bb.1 + + bb.1.if.then: + %1:vr = PseudoVADD_VV_M1 %5, %6, %0, 6 + PseudoBR %bb.3 + + bb.2.if.else: + %2:vr = PseudoVSUB_VV_M1 %5, %6, %0, 6 + + bb.3.if.end: + %3:vr = PHI %1, %bb.1, %2, %bb.2 + $v8 = COPY %3 + PseudoRET implicit $v8 + +...