diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -143,7 +143,13 @@ bool VLAny = false; // Only zero vs non-zero is used. If demanded, can change non-zero values. bool VLZeroness = false; - bool SEW = false; + // What properties of SEW we need to preserve. + enum : uint8_t { + SEWEqual = 2, // The exact value of SEW needs to be preserved. + SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater + // than or equal to the original value. + SEWNone = 0 // We don't need to preserve SEW at all. + } SEW = SEWNone; bool LMUL = false; bool SEWLMULRatio = false; bool TailPolicy = false; @@ -151,7 +157,7 @@ // Return true if any part of VTYPE was used bool usedVTYPE() const { - return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; + return SEW != SEWNone || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; } // Return true if any property of VL was used @@ -161,7 +167,7 @@ // Mark all VTYPE subfields and properties as demanded void demandVTYPE() { - SEW = true; + SEW = SEWEqual; LMUL = true; SEWLMULRatio = true; TailPolicy = true; @@ -204,41 +210,44 @@ } #endif - -/// Return true if the two values of the VTYPE register provided are -/// indistinguishable from the perspective of an instruction (or set of -/// instructions) which use only the Used subfields and properties. -static bool areCompatibleVTYPEs(uint64_t VType1, - uint64_t VType2, +/// Return true if moving from CurVType to NewVType is +/// indistinguishable from the perspective of an instruction (or set +/// of instructions) which use only the Used subfields and properties. +static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, const DemandedFields &Used) { - if (Used.SEW && - RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2)) + if (Used.SEW == DemandedFields::SEWEqual && + RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) + return false; + + if (Used.SEW == DemandedFields::SEWGreaterThanOrEqual && + RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) return false; if (Used.LMUL && - RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2)) + RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) return false; if (Used.SEWLMULRatio) { - auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType1), - RISCVVType::getVLMUL(VType1)); - auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType2), - RISCVVType::getVLMUL(VType2)); + auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), + RISCVVType::getVLMUL(CurVType)); + auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), + RISCVVType::getVLMUL(NewVType)); if (Ratio1 != Ratio2) return false; } - if (Used.TailPolicy && - RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2)) + if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != + RISCVVType::isTailAgnostic(NewVType)) return false; - if (Used.MaskPolicy && - RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2)) + if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != + RISCVVType::isMaskAgnostic(NewVType)) return false; return true; } /// Return the fields and properties demanded by the provided instruction. -static DemandedFields getDemanded(const MachineInstr &MI) { +DemandedFields getDemanded(const MachineInstr &MI, + const MachineRegisterInfo *MRI) { // Warning: This function has to work on both the lowered (i.e. post // emitVSETVLIs) and pre-lowering forms. The main implication of this is // that it can't use the value of a SEW, VL, or Policy operand as they might @@ -270,7 +279,7 @@ // Note: We assume that the instructions initial SEW is the EEW encoded // in the opcode. This is asserted when constructing the VSETVLIInfo. if (getEEWForLoadStore(MI)) { - Res.SEW = false; + Res.SEW = DemandedFields::SEWNone; Res.LMUL = false; } @@ -285,7 +294,7 @@ // * Probably ok if available VLMax is larger than demanded // * The policy bits can probably be ignored.. if (isMaskRegOp(MI)) { - Res.SEW = false; + Res.SEW = DemandedFields::SEWNone; Res.LMUL = false; } @@ -294,6 +303,17 @@ Res.LMUL = false; Res.SEWLMULRatio = false; Res.VLAny = false; + // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't + // need to preserve any other bits and are thus compatible with any larger, + // etype and can disregard policy bits. Warning: It's tempting to try doing + // this for any tail agnostic operation, but we can't as TA requires + // tail lanes to either be the original value or -1. We are writing + // unknown bits to the lanes here. + auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); + if (VRegDef && VRegDef->isImplicitDef()) { + Res.SEW = DemandedFields::SEWGreaterThanOrEqual; + Res.TailPolicy = false; + } } return Res; @@ -856,22 +876,7 @@ if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) return true; - DemandedFields Used = getDemanded(MI); - - if (isScalarMoveInstr(MI)) { - // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't - // need to preserve any other bits and are thus compatible with any larger, - // etype and can disregard policy bits. Warning: It's tempting to try doing - // this for any tail agnostic operation, but we can't as TA requires - // tail lanes to either be the original value or -1. We are writing - // unknown bits to the lanes here. - auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg()); - if (VRegDef && VRegDef->isImplicitDef() && - CurInfo.getSEW() >= Require.getSEW()) { - Used.SEW = false; - Used.TailPolicy = false; - } - } + DemandedFields Used = getDemanded(MI, MRI); // A slidedown/slideup with an IMPLICIT_DEF merge op can freely clobber // elements not copied from the source vector (e.g. masked off, tail, or @@ -1307,7 +1312,7 @@ static void doUnion(DemandedFields &A, DemandedFields B) { A.VLAny |= B.VLAny; A.VLZeroness |= B.VLZeroness; - A.SEW |= B.SEW; + A.SEW = std::max(A.SEW, B.SEW); A.LMUL |= B.LMUL; A.SEWLMULRatio |= B.SEWLMULRatio; A.TailPolicy |= B.TailPolicy; @@ -1377,7 +1382,7 @@ for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { if (!isVectorConfigInstr(MI)) { - doUnion(Used, getDemanded(MI)); + doUnion(Used, getDemanded(MI, MRI)); continue; } @@ -1405,7 +1410,7 @@ } } NextMI = &MI; - Used = getDemanded(MI); + Used = getDemanded(MI, MRI); } for (auto *MI : ToDelete) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll @@ -39,14 +39,12 @@ define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; RV32-LABEL: trn1.v16i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vid.v v11 ; RV32-NEXT: vrgather.vv v10, v8, v11 ; RV32-NEXT: lui a0, 11 ; RV32-NEXT: addi a0, a0, -1366 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vadd.vi v8, v11, -1 ; RV32-NEXT: vrgather.vv v10, v9, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 @@ -54,14 +52,12 @@ ; ; RV64-LABEL: trn1.v16i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vid.v v11 ; RV64-NEXT: vrgather.vv v10, v8, v11 ; RV64-NEXT: lui a0, 11 ; RV64-NEXT: addiw a0, a0, -1366 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vadd.vi v8, v11, -1 ; RV64-NEXT: vrgather.vv v10, v9, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v10 @@ -73,30 +69,26 @@ define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; RV32-LABEL: trn2.v16i8: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vid.v v11 ; RV32-NEXT: vadd.vi v12, v11, 1 -; RV32-NEXT: vrgather.vv v10, v8, v12 ; RV32-NEXT: lui a0, 11 ; RV32-NEXT: addi a0, a0, -1366 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-NEXT: vrgather.vv v10, v8, v12 ; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: trn2.v16i8: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vid.v v11 ; RV64-NEXT: vadd.vi v12, v11, 1 -; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: lui a0, 11 ; RV64-NEXT: addiw a0, a0, -1366 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret @@ -107,10 +99,11 @@ define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: trn1.v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t @@ -123,12 +116,13 @@ define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: trn2.v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -139,10 +133,11 @@ define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: trn1.v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t @@ -155,12 +150,13 @@ define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: trn2.v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -185,9 +181,8 @@ ; CHECK-LABEL: trn2.v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -199,10 +194,11 @@ define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: trn1.v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t @@ -215,12 +211,13 @@ define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: trn2.v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -242,9 +239,8 @@ ; CHECK-LABEL: trn2.v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -270,9 +266,8 @@ ; CHECK-LABEL: trn2.v2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -284,10 +279,11 @@ define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: trn1.v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t @@ -300,12 +296,13 @@ define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: trn2.v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -327,9 +324,8 @@ ; CHECK-LABEL: trn2.v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -341,10 +337,11 @@ define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: trn1.v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t @@ -357,12 +354,13 @@ define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: trn2.v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -373,10 +371,11 @@ define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: trn1.v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t @@ -389,12 +388,13 @@ define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: trn2.v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 170 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1400,9 +1400,8 @@ ; RV32-NEXT: vand.vx v11, v11, a4, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t ; RV32-NEXT: li a5, 5 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v13, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v13, v13, a5, v0 @@ -1525,9 +1524,8 @@ ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsrl.vi v10, v8, 8 ; RV32-NEXT: li a4, 5 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v11, v11, a4, v0 @@ -1660,9 +1658,8 @@ ; RV32-NEXT: vand.vx v14, v14, a4, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: li a5, 85 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v18, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 @@ -1785,9 +1782,8 @@ ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: li a4, 85 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v14, v14, a4, v0 @@ -1921,9 +1917,8 @@ ; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t ; RV32-NEXT: lui a5, 5 ; RV32-NEXT: addi a5, a5, 1365 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v20, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 @@ -2047,9 +2042,8 @@ ; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: lui a4, 5 ; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -177,6 +177,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v10, a4 ; RV32-NEXT: li a5, 5 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v11, 0 @@ -703,8 +704,8 @@ ; LMULMAX2-RV32-NEXT: lui a4, 4080 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 ; LMULMAX2-RV32-NEXT: li a5, 85 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5 ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV32-NEXT: lui a5, 1044480 ; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a5, v0 @@ -831,6 +832,7 @@ ; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v12, v9, a5 ; LMULMAX1-RV32-NEXT: li a6, 5 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -440,9 +440,8 @@ ; RV32-NEXT: vand.vx v11, v11, a4, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t ; RV32-NEXT: li a5, 5 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v13, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v13, v13, a5, v0 @@ -514,9 +513,8 @@ ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsrl.vi v10, v8, 8 ; RV32-NEXT: li a4, 5 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v11, v11, a4, v0 @@ -598,9 +596,8 @@ ; RV32-NEXT: vand.vx v14, v14, a4, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: li a5, 85 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v18, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 @@ -672,9 +669,8 @@ ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: li a4, 85 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v14, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v14, v14, a4, v0 @@ -757,9 +753,8 @@ ; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t ; RV32-NEXT: lui a5, 5 ; RV32-NEXT: addi a5, a5, 1365 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vmv.v.i v20, 0 ; RV32-NEXT: lui a5, 1044480 ; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 @@ -832,9 +827,8 @@ ; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: lui a4, 5 ; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -83,6 +83,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v10, v10, a4 ; RV32-NEXT: li a5, 5 +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v11, 0 @@ -330,8 +331,8 @@ ; LMULMAX2-RV32-NEXT: lui a4, 4080 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 ; LMULMAX2-RV32-NEXT: li a5, 85 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5 ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV32-NEXT: lui a5, 1044480 ; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a5, v0 @@ -407,6 +408,7 @@ ; LMULMAX1-RV32-NEXT: lui a5, 4080 ; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 ; LMULMAX1-RV32-NEXT: li a6, 5 +; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v12, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -36,9 +36,8 @@ ; LMULMAX1-LABEL: hang_when_merging_stores_after_legalization: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: li a0, 2 -; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX1-NEXT: vmv.s.x v0, a0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; LMULMAX1-NEXT: vmv.s.x v0, a0 ; LMULMAX1-NEXT: vrgather.vi v12, v8, 0 ; LMULMAX1-NEXT: vrgather.vi v12, v9, 3, v0.t ; LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma @@ -50,14 +49,15 @@ ; ; LMULMAX2-LABEL: hang_when_merging_stores_after_legalization: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vid.v v12 ; LMULMAX2-NEXT: li a0, 7 ; LMULMAX2-NEXT: vmul.vx v14, v12, a0 ; LMULMAX2-NEXT: vrgather.vv v12, v8, v14 +; LMULMAX2-NEXT: vadd.vi v8, v14, -14 ; LMULMAX2-NEXT: li a0, 12 +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vmv.s.x v0, a0 -; LMULMAX2-NEXT: vadd.vi v8, v14, -14 ; LMULMAX2-NEXT: vrgather.vv v12, v10, v8, v0.t ; LMULMAX2-NEXT: vmv1r.v v8, v12 ; LMULMAX2-NEXT: ret @@ -152,9 +152,8 @@ ; CHECK-LABEL: buildvec_merge0_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 6 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vfmv.v.f v8, fa0 ; CHECK-NEXT: lui a1, 262144 ; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -42,9 +42,10 @@ ; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-V128-NEXT: vid.v v9 ; RV32-V128-NEXT: vsrl.vi v14, v9, 1 -; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v14 ; RV32-V128-NEXT: li a0, 10 +; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-V128-NEXT: vmv.s.x v0, a0 ; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v14, v0.t ; RV32-V128-NEXT: vmv.v.v v8, v10 @@ -53,11 +54,12 @@ ; RV64-V128-LABEL: interleave_v2f64: ; RV64-V128: # %bb.0: ; RV64-V128-NEXT: vmv1r.v v12, v9 -; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-V128-NEXT: vid.v v10 ; RV64-V128-NEXT: vsrl.vi v14, v10, 1 ; RV64-V128-NEXT: vrgather.vv v10, v8, v14 ; RV64-V128-NEXT: li a0, 10 +; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-V128-NEXT: vmv.s.x v0, a0 ; RV64-V128-NEXT: vrgather.vv v10, v12, v14, v0.t ; RV64-V128-NEXT: vmv.v.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -6,9 +6,8 @@ ; CHECK-LABEL: shuffle_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 11 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> @@ -19,9 +18,8 @@ ; CHECK-LABEL: shuffle_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 236 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> @@ -34,9 +32,8 @@ ; RV32-NEXT: li a0, 9 ; RV32-NEXT: lui a1, %hi(.LCPI2_0) ; RV32-NEXT: fld fa5, %lo(.LCPI2_0)(a1) -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vfmerge.vfm v8, v8, fa5, v0 ; RV32-NEXT: ret ; @@ -45,9 +42,8 @@ ; RV64-NEXT: lui a0, %hi(.LCPI2_0) ; RV64-NEXT: fld fa5, %lo(.LCPI2_0)(a0) ; RV64-NEXT: li a0, 9 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vfmerge.vfm v8, v8, fa5, v0 ; RV64-NEXT: ret %s = shufflevector <4 x double> , <4 x double> %x, <4 x i32> @@ -60,9 +56,8 @@ ; RV32-NEXT: li a0, 6 ; RV32-NEXT: lui a1, %hi(.LCPI3_0) ; RV32-NEXT: fld fa5, %lo(.LCPI3_0)(a1) -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vfmerge.vfm v8, v8, fa5, v0 ; RV32-NEXT: ret ; @@ -71,9 +66,8 @@ ; RV64-NEXT: lui a0, %hi(.LCPI3_0) ; RV64-NEXT: fld fa5, %lo(.LCPI3_0)(a0) ; RV64-NEXT: li a0, 6 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vfmerge.vfm v8, v8, fa5, v0 ; RV64-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> @@ -133,11 +127,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI6_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI6_0) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle16.v v14, (a0) +; RV32-NEXT: vrgatherei16.vv v12, v8, v14 ; RV32-NEXT: li a0, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v12, v8, v14 ; RV32-NEXT: vrgather.vi v12, v10, 1, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret @@ -146,11 +141,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI6_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI6_0) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v14, (a0) +; RV64-NEXT: vrgather.vv v12, v8, v14 ; RV64-NEXT: li a0, 8 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vrgather.vv v12, v8, v14 ; RV64-NEXT: vrgather.vi v12, v10, 1, v0.t ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret @@ -162,9 +158,8 @@ ; RV32-LABEL: vrgather_shuffle_xv_v4f64: ; RV32: # %bb.0: ; RV32-NEXT: li a0, 12 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: lui a0, %hi(.LCPI7_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) ; RV32-NEXT: vlse64.v v10, (a0), zero @@ -177,13 +172,12 @@ ; ; RV64-LABEL: vrgather_shuffle_xv_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 12 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: lui a0, %hi(.LCPI7_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0) ; RV64-NEXT: vlse64.v v10, (a0), zero +; RV64-NEXT: li a0, 12 +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vid.v v12 ; RV64-NEXT: vrsub.vi v12, v12, 4 ; RV64-NEXT: vrgather.vv v10, v8, v12, v0.t @@ -197,28 +191,29 @@ ; RV32-LABEL: vrgather_shuffle_vx_v4f64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vid.v v12 +; RV32-NEXT: vid.v v10 ; RV32-NEXT: li a0, 3 +; RV32-NEXT: vmul.vx v12, v10, a0 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-NEXT: lui a1, %hi(.LCPI8_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0) ; RV32-NEXT: vlse64.v v10, (a1), zero -; RV32-NEXT: vmul.vx v12, v12, a0 ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; RV32-NEXT: vrgatherei16.vv v10, v8, v12, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vx_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vid.v v12 -; RV64-NEXT: lui a0, %hi(.LCPI8_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI8_0) -; RV64-NEXT: vlse64.v v10, (a0), zero +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vid.v v10 ; RV64-NEXT: li a0, 3 +; RV64-NEXT: vmul.vx v12, v10, a0 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: lui a1, %hi(.LCPI8_0) +; RV64-NEXT: addi a1, a1, %lo(.LCPI8_0) +; RV64-NEXT: vlse64.v v10, (a1), zero ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vmul.vx v12, v12, a0 ; RV64-NEXT: vrgather.vv v10, v8, v12, v0.t ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -55,9 +55,10 @@ ; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-V128-NEXT: vid.v v9 ; RV32-V128-NEXT: vsrl.vi v14, v9, 1 -; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v14 ; RV32-V128-NEXT: li a0, 10 +; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-V128-NEXT: vmv.s.x v0, a0 ; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v14, v0.t ; RV32-V128-NEXT: vmv.v.v v8, v10 @@ -66,11 +67,12 @@ ; RV64-V128-LABEL: interleave_v2i64: ; RV64-V128: # %bb.0: ; RV64-V128-NEXT: vmv1r.v v12, v9 -; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-V128-NEXT: vid.v v10 ; RV64-V128-NEXT: vsrl.vi v14, v10, 1 ; RV64-V128-NEXT: vrgather.vv v10, v8, v14 ; RV64-V128-NEXT: li a0, 10 +; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-V128-NEXT: vmv.s.x v0, a0 ; RV64-V128-NEXT: vrgather.vv v10, v12, v14, v0.t ; RV64-V128-NEXT: vmv.v.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -6,9 +6,8 @@ ; CHECK-LABEL: shuffle_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 11 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> @@ -19,9 +18,8 @@ ; CHECK-LABEL: shuffle_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 203 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> @@ -32,9 +30,8 @@ ; CHECK-LABEL: shuffle_xv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 9 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> , <4 x i16> %x, <4 x i32> @@ -45,9 +42,8 @@ ; CHECK-LABEL: shuffle_vx_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 6 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> , <4 x i32> @@ -87,11 +83,12 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v11, (a0) +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 8 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -103,9 +100,8 @@ ; CHECK-LABEL: vrgather_shuffle_xv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 12 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vi v10, v9, 4 ; CHECK-NEXT: vmv.v.i v9, 5 @@ -119,10 +115,11 @@ define <4 x i16> @vrgather_shuffle_vx_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_shuffle_vx_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: li a0, 3 ; CHECK-NEXT: vmul.vx v10, v9, a0 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmv.v.i v9, 5 ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t @@ -185,32 +182,34 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, 5 -; RV32-NEXT: vmv.v.i v20, 2 -; RV32-NEXT: vslideup.vi v20, v16, 7 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV32-NEXT: vle16.v v21, (a0) -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vle16.v v20, (a0) +; RV32-NEXT: vmv.v.i v21, 2 +; RV32-NEXT: vslideup.vi v21, v16, 7 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vrgatherei16.vv v16, v8, v20 ; RV32-NEXT: li a0, 164 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v16, v8, v21 -; RV32-NEXT: vrgatherei16.vv v16, v12, v20, v0.t +; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vv_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: li a0, 5 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vmv.s.x v16, a0 ; RV64-NEXT: vmv.v.i v20, 2 ; RV64-NEXT: lui a0, %hi(.LCPI11_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI11_0) ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vslideup.vi v20, v16, 7 +; RV64-NEXT: vrgather.vv v16, v8, v24 ; RV64-NEXT: li a0, 164 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vrgather.vv v16, v8, v24 ; RV64-NEXT: vrgather.vv v16, v12, v20, v0.t ; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: ret @@ -223,28 +222,28 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI12_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vmv.v.i v20, -1 +; RV32-NEXT: vrgatherei16.vv v12, v20, v16 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: lui a0, %hi(.LCPI12_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) -; RV32-NEXT: vle16.v v17, (a0) +; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: li a0, 113 ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v12, v20, v16 -; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t +; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_xv_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 113 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: lui a0, %hi(.LCPI12_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI12_0) -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: li a0, 113 +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vrgather.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 @@ -258,9 +257,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI13_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vrgatherei16.vv v12, v8, v16 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: lui a0, %hi(.LCPI13_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) ; RV32-NEXT: vle16.v v8, (a0) @@ -273,13 +273,12 @@ ; ; RV64-LABEL: vrgather_shuffle_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 115 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: lui a0, %hi(.LCPI13_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI13_0) -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: li a0, 115 +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vmv.v.i v12, 5 ; RV64-NEXT: vrgather.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -230,9 +230,8 @@ ; LMULMAX1-RV32-LABEL: splat_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: li a3, 5 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a3 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.s.x v0, a3 ; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 ; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 @@ -879,8 +878,8 @@ ; LMULMAX2-RV32-NEXT: addi a0, a0, 32 ; LMULMAX2-RV32-NEXT: vle64.v v14, (a0) ; LMULMAX2-RV32-NEXT: li a0, 85 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a0 ; LMULMAX2-RV32-NEXT: vmv.v.x v16, a2 ; LMULMAX2-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -900,7 +899,7 @@ ; LMULMAX1-RV32-LABEL: vadd_vx_v16i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: addi a4, a0, 96 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; LMULMAX1-RV32-NEXT: vle64.v v8, (a4) ; LMULMAX1-RV32-NEXT: addi a4, a0, 112 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a4) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1357,94 +1357,78 @@ ; RV32-LABEL: mulhu_v16i8: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: vle8.v v9, (a0) ; RV32-NEXT: li a1, 513 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 4 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vmv.v.i v8, 4 +; RV32-NEXT: vmerge.vim v10, v8, 1, v0 ; RV32-NEXT: lui a1, 1 ; RV32-NEXT: addi a2, a1, 78 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.s.x v0, a2 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 3, v0 ; RV32-NEXT: lui a2, 8 ; RV32-NEXT: addi a2, a2, 304 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a2 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 2, v0 +; RV32-NEXT: vmv.s.x v8, a2 +; RV32-NEXT: vmerge.vim v10, v10, 3, v0 +; RV32-NEXT: vmv.v.v v0, v8 +; RV32-NEXT: vmerge.vim v10, v10, 2, v0 ; RV32-NEXT: lui a2, 3 ; RV32-NEXT: addi a2, a2, -2044 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.s.x v0, a2 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: li a2, -128 -; RV32-NEXT: vmerge.vxm v11, v10, a2, v0 ; RV32-NEXT: addi a1, a1, 32 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: lui a1, %hi(.LCPI65_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI65_0) ; RV32-NEXT: vle8.v v12, (a1) -; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsrl.vv v10, v8, v10 -; RV32-NEXT: vmulhu.vv v10, v10, v12 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: vmulhu.vv v8, v8, v11 -; RV32-NEXT: vadd.vv v8, v8, v10 -; RV32-NEXT: vsrl.vv v8, v8, v9 +; RV32-NEXT: vmerge.vxm v13, v11, a2, v0 +; RV32-NEXT: vmv.v.v v0, v8 +; RV32-NEXT: vmerge.vim v8, v11, 1, v0 +; RV32-NEXT: vsrl.vv v8, v9, v8 +; RV32-NEXT: vmulhu.vv v8, v8, v12 +; RV32-NEXT: vsub.vv v9, v9, v8 +; RV32-NEXT: vmulhu.vv v9, v9, v13 +; RV32-NEXT: vadd.vv v8, v9, v8 +; RV32-NEXT: vsrl.vv v8, v8, v10 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhu_v16i8: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vle8.v v8, (a0) +; RV64-NEXT: vle8.v v9, (a0) ; RV64-NEXT: li a1, 513 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, 4 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 +; RV64-NEXT: vmv.v.i v8, 4 +; RV64-NEXT: vmerge.vim v10, v8, 1, v0 ; RV64-NEXT: lui a1, 1 ; RV64-NEXT: addiw a2, a1, 78 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 3, v0 ; RV64-NEXT: lui a2, 8 ; RV64-NEXT: addiw a2, a2, 304 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 2, v0 +; RV64-NEXT: vmv.s.x v8, a2 +; RV64-NEXT: vmerge.vim v10, v10, 3, v0 +; RV64-NEXT: vmv.v.v v0, v8 +; RV64-NEXT: vmerge.vim v10, v10, 2, v0 ; RV64-NEXT: lui a2, 3 ; RV64-NEXT: addiw a2, a2, -2044 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: vmv.v.i v11, 0 ; RV64-NEXT: li a2, -128 -; RV64-NEXT: vmerge.vxm v11, v10, a2, v0 ; RV64-NEXT: addiw a1, a1, 32 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: lui a1, %hi(.LCPI65_0) ; RV64-NEXT: addi a1, a1, %lo(.LCPI65_0) ; RV64-NEXT: vle8.v v12, (a1) -; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsrl.vv v10, v8, v10 -; RV64-NEXT: vmulhu.vv v10, v10, v12 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vmulhu.vv v8, v8, v11 -; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vsrl.vv v8, v8, v9 +; RV64-NEXT: vmerge.vxm v13, v11, a2, v0 +; RV64-NEXT: vmv.v.v v0, v8 +; RV64-NEXT: vmerge.vim v8, v11, 1, v0 +; RV64-NEXT: vsrl.vv v8, v9, v8 +; RV64-NEXT: vmulhu.vv v8, v8, v12 +; RV64-NEXT: vsub.vv v9, v9, v8 +; RV64-NEXT: vmulhu.vv v9, v9, v13 +; RV64-NEXT: vadd.vv v8, v9, v8 +; RV64-NEXT: vsrl.vv v8, v8, v10 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret %a = load <16 x i8>, ptr %x @@ -1477,6 +1461,7 @@ ; CHECK-NEXT: vmulhu.vv v8, v8, v10 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: li a1, 33 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 @@ -1628,9 +1613,7 @@ ; RV32-NEXT: vle8.v v8, (a0) ; RV32-NEXT: lui a1, 5 ; RV32-NEXT: addi a1, a1, -1452 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, 7 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 ; RV32-NEXT: li a1, -123 @@ -1648,9 +1631,7 @@ ; RV64-NEXT: vle8.v v8, (a0) ; RV64-NEXT: lui a1, 5 ; RV64-NEXT: addiw a1, a1, -1452 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, 7 ; RV64-NEXT: vmerge.vim v9, v9, 1, v0 ; RV64-NEXT: li a1, -123 @@ -1673,6 +1654,7 @@ ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: li a1, 105 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: lui a1, 5 ; RV32-NEXT: addi a1, a1, -1755 @@ -1692,6 +1674,7 @@ ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: li a1, 105 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: lui a1, 5 ; RV64-NEXT: addiw a1, a1, -1755 @@ -1728,8 +1711,8 @@ ; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; RV32-NEXT: vslideup.vi v10, v9, 4 ; RV32-NEXT: li a1, 6 -; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vmv.v.i v9, -7 ; RV32-NEXT: vmerge.vim v9, v9, 7, v0 ; RV32-NEXT: vdiv.vv v8, v8, v9 @@ -1754,6 +1737,7 @@ ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV64-NEXT: vdiv.vv v9, v9, v10 ; RV64-NEXT: li a1, 6 +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64-NEXT: vmv.v.i v10, -7 @@ -1780,6 +1764,7 @@ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: li a1, 5 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: lui a1, 419430 ; RV32-NEXT: addi a1, a1, 1639 @@ -5476,48 +5461,39 @@ ; LMULMAX2-RV32-NEXT: li a1, 32 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: lui a2, 66049 -; LMULMAX2-RV32-NEXT: addi a2, a2, 32 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI181_0) -; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV32-NEXT: vle8.v v10, (a2) +; LMULMAX2-RV32-NEXT: lui a1, 66049 +; LMULMAX2-RV32-NEXT: addi a1, a1, 32 +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI181_0) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI181_0) +; LMULMAX2-RV32-NEXT: vle8.v v10, (a1) ; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV32-NEXT: vmerge.vim v14, v12, 1, v0 ; LMULMAX2-RV32-NEXT: vsrl.vv v14, v8, v14 ; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v14, v10 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a2, 163907 -; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: li a2, -128 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a2, v0 +; LMULMAX2-RV32-NEXT: lui a1, 163907 +; LMULMAX2-RV32-NEXT: addi a1, a1, -2044 +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: li a1, -128 +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a1, v0 ; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a2, 8208 -; LMULMAX2-RV32-NEXT: addi a2, a2, 513 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 4 -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV32-NEXT: lui a2, 66785 -; LMULMAX2-RV32-NEXT: addi a2, a2, 78 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 3, v0 -; LMULMAX2-RV32-NEXT: lui a2, 529160 -; LMULMAX2-RV32-NEXT: addi a2, a2, 304 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 2, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vadd.vv v10, v8, v10 +; LMULMAX2-RV32-NEXT: lui a1, 8208 +; LMULMAX2-RV32-NEXT: addi a1, a1, 513 +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vmv.v.i v8, 4 +; LMULMAX2-RV32-NEXT: vmerge.vim v12, v8, 1, v0 +; LMULMAX2-RV32-NEXT: lui a1, 66785 +; LMULMAX2-RV32-NEXT: addi a1, a1, 78 +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: lui a1, 529160 +; LMULMAX2-RV32-NEXT: addi a1, a1, 304 +; LMULMAX2-RV32-NEXT: vmv.s.x v8, a1 +; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 3, v0 +; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 +; LMULMAX2-RV32-NEXT: vmerge.vim v8, v12, 2, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v8, v10, v8 ; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -5526,48 +5502,39 @@ ; LMULMAX2-RV64-NEXT: li a1, 32 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a2, 66049 -; LMULMAX2-RV64-NEXT: addiw a2, a2, 32 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI181_0) -; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV64-NEXT: vle8.v v10, (a2) +; LMULMAX2-RV64-NEXT: lui a1, 66049 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 32 +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI181_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI181_0) +; LMULMAX2-RV64-NEXT: vle8.v v10, (a1) ; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV64-NEXT: vmerge.vim v14, v12, 1, v0 ; LMULMAX2-RV64-NEXT: vsrl.vv v14, v8, v14 ; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v14, v10 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a2, 163907 -; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: li a2, -128 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a2, v0 +; LMULMAX2-RV64-NEXT: lui a1, 163907 +; LMULMAX2-RV64-NEXT: addiw a1, a1, -2044 +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: li a1, -128 +; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a2, 8208 -; LMULMAX2-RV64-NEXT: addiw a2, a2, 513 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 4 -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV64-NEXT: lui a2, 66785 -; LMULMAX2-RV64-NEXT: addiw a2, a2, 78 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 3, v0 -; LMULMAX2-RV64-NEXT: lui a2, 529160 -; LMULMAX2-RV64-NEXT: addiw a2, a2, 304 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 2, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vadd.vv v10, v8, v10 +; LMULMAX2-RV64-NEXT: lui a1, 8208 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 513 +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v8, 4 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v8, 1, v0 +; LMULMAX2-RV64-NEXT: lui a1, 66785 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 78 +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: lui a1, 529160 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 304 +; LMULMAX2-RV64-NEXT: vmv.s.x v8, a1 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 3, v0 +; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 +; LMULMAX2-RV64-NEXT: vmerge.vim v8, v12, 2, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -5684,6 +5651,7 @@ ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: li a1, 68 +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vmv.s.x v0, a1 ; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0) ; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0) @@ -5696,6 +5664,7 @@ ; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: li a1, 136 +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vmv.s.x v0, a1 ; LMULMAX2-NEXT: vmv.v.i v10, 2 ; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 @@ -5768,6 +5737,7 @@ ; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v8, v10 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a1, 524288 +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; LMULMAX2-RV32-NEXT: vmv.s.x v12, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 @@ -5885,15 +5855,13 @@ ; LMULMAX2-RV32-NEXT: li a1, 32 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a2, -123 -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 -; LMULMAX2-RV32-NEXT: lui a2, 304453 -; LMULMAX2-RV32-NEXT: addi a2, a2, -1452 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: li a2, 57 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV32-NEXT: li a1, -123 +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: lui a1, 304453 +; LMULMAX2-RV32-NEXT: addi a1, a1, -1452 +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: li a1, 57 +; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 @@ -5906,15 +5874,13 @@ ; LMULMAX2-RV64-NEXT: li a1, 32 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a2, -123 -; LMULMAX2-RV64-NEXT: vmv.v.x v10, a2 -; LMULMAX2-RV64-NEXT: lui a2, 304453 -; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: li a2, 57 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV64-NEXT: li a1, -123 +; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV64-NEXT: lui a1, 304453 +; LMULMAX2-RV64-NEXT: addiw a1, a1, -1452 +; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: li a1, 57 +; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 @@ -5930,9 +5896,7 @@ ; LMULMAX1-RV32-NEXT: vle8.v v9, (a1) ; LMULMAX1-RV32-NEXT: lui a2, 5 ; LMULMAX1-RV32-NEXT: addi a2, a2, -1452 -; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v10, -9 ; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 9, v0 ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 @@ -5949,9 +5913,7 @@ ; LMULMAX1-RV64-NEXT: vle8.v v9, (a1) ; LMULMAX1-RV64-NEXT: lui a2, 5 ; LMULMAX1-RV64-NEXT: addiw a2, a2, -1452 -; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX1-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmv.v.i v10, -9 ; LMULMAX1-RV64-NEXT: vmerge.vim v10, v10, 9, v0 ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v9, v10 @@ -6013,6 +5975,7 @@ ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vle16.v v9, (a1) ; LMULMAX1-NEXT: li a2, 105 +; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; LMULMAX1-NEXT: vmv.s.x v0, a2 ; LMULMAX1-NEXT: vmv.v.i v10, 7 ; LMULMAX1-NEXT: vmerge.vim v10, v10, -7, v0 @@ -6033,6 +5996,7 @@ ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV32-NEXT: li a1, 85 +; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 419430 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1639 @@ -6070,6 +6034,7 @@ ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) ; LMULMAX1-RV32-NEXT: li a2, 5 +; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX1-RV32-NEXT: lui a2, 419430 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1639 @@ -6118,18 +6083,18 @@ ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32-NEXT: li a1, 17 +; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a2, a1, 1365 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1366 ; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmulh.vv v10, v8, v10 ; LMULMAX2-RV32-NEXT: li a1, 51 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vmv.v.i v12, -1 ; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -6137,8 +6102,8 @@ ; LMULMAX2-RV32-NEXT: li a1, 63 ; LMULMAX2-RV32-NEXT: vsrl.vx v8, v12, a1 ; LMULMAX2-RV32-NEXT: li a1, 68 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -6150,25 +6115,26 @@ ; LMULMAX2-RV64-LABEL: mulhs_v4i64: ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV64-NEXT: li a1, 5 +; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI188_0) +; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI188_0) +; LMULMAX2-RV64-NEXT: vlse64.v v10, (a2), zero +; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI188_1) +; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI188_1)(a2) ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI188_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI188_0) -; LMULMAX2-RV64-NEXT: vlse64.v v8, (a1), zero -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI188_1) -; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI188_1)(a1) -; LMULMAX2-RV64-NEXT: vle64.v v10, (a0) ; LMULMAX2-RV64-NEXT: vmv.v.i v12, -1 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV64-NEXT: vmerge.vxm v8, v8, a1, v0 -; LMULMAX2-RV64-NEXT: vmulh.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vmacc.vv v8, v10, v12 +; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV64-NEXT: vmulh.vv v10, v8, v10 +; LMULMAX2-RV64-NEXT: vmacc.vv v10, v8, v12 ; LMULMAX2-RV64-NEXT: li a1, 63 -; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a1 +; LMULMAX2-RV64-NEXT: vsrl.vx v8, v10, a1 ; LMULMAX2-RV64-NEXT: vmv.v.i v12, 1 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV64-NEXT: vsra.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vsra.vv v10, v10, v12 +; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -17,8 +17,8 @@ ; CHECK-NEXT: vadd.vv v9, v8, v8 ; CHECK-NEXT: vrgather.vv v8, v10, v9 ; CHECK-NEXT: li a0, 4 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v12, v10, 4 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vrgather.vi v8, v12, 0, v0.t @@ -132,10 +132,10 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 82 +; RV32-NEXT: li a3, 88 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 82 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb ; RV32-NEXT: addi a3, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma @@ -143,38 +143,38 @@ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vid.v v16 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 37 +; RV32-NEXT: li a4, 44 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs4r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vadd.vi v8, v16, -4 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a4, a3, 4 -; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs4r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vrgather.vv v12, v24, v8 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 45 +; RV32-NEXT: li a4, 52 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs4r.v v12, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vadd.vi v8, v16, -10 ; RV32-NEXT: lui a3, 12 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a4, a3, 5 -; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: li a4, 36 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v24, 16 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 57 +; RV32-NEXT: li a4, 56 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 @@ -182,14 +182,14 @@ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 41 +; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs4r.v v12, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a4, a3, 6 -; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: li a4, 80 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill @@ -202,46 +202,43 @@ ; RV32-NEXT: lui a6, 1 ; RV32-NEXT: vle32.v v8, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a7, a4, 3 -; RV32-NEXT: add a4, a7, a4 +; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vle32.v v24, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 73 -; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v8, (a5) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 25 +; RV32-NEXT: li a4, 28 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: addi a1, a6, -64 -; RV32-NEXT: vmv.s.x v24, a1 +; RV32-NEXT: vmv.s.x v16, a1 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs1r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 3 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v24, v16, v0 +; RV32-NEXT: vrgather.vv v16, v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 @@ -249,36 +246,36 @@ ; RV32-NEXT: vl1r.v v2, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 28 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v24, v8, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v8, v24, 0 +; RV32-NEXT: vslideup.vi v8, v16, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 44 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vi v16, v12, -2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 56 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -286,15 +283,15 @@ ; RV32-NEXT: vrgather.vv v8, v24, v16 ; RV32-NEXT: vadd.vi v16, v12, -8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 36 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 80 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload @@ -305,39 +302,38 @@ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_3) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3) -; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vle32.v v24, (a1) ; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 28 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v24, v8, v16 +; RV32-NEXT: vrgather.vv v16, v8, v24 ; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 28 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v24, v8, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vslideup.vi v4, v24, 0 +; RV32-NEXT: vslideup.vi v4, v16, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 28 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -347,35 +343,34 @@ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 56 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v12, v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 44 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vi v8, v8, -6 +; RV32-NEXT: vadd.vi v4, v8, -6 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 3 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 80 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t +; RV32-NEXT: vrgather.vv v12, v16, v4, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 44 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -385,43 +380,44 @@ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_6) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) -; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vle32.v v24, (a1) ; RV32-NEXT: vle32.v v8, (a3) -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 960 -; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vmv.s.x v1, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v16, v24 +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v24, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 44 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vslideup.vi v12, v8, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 44 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -431,37 +427,32 @@ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 56 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v12, v24, v8 +; RV32-NEXT: vrgather.vv v28, v24, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 36 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 80 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v12, v24, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v28, v8, v12, v0.t +; RV32-NEXT: vmv.v.v v4, v28 ; RV32-NEXT: lui a1, %hi(.LCPI6_8) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_8) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu @@ -470,71 +461,77 @@ ; RV32-NEXT: vle32.v v24, (a1) ; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 36 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v0, v24 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v16, v24 +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a3, 72 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 36 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma +; RV32-NEXT: vslideup.vi v4, v8, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 36 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v12, v8, 0 +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v12, v8, v16 ; RV32-NEXT: lui a1, %hi(.LCPI6_10) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) +; RV32-NEXT: lui a3, 15 ; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: lui a1, 15 -; RV32-NEXT: vmv.s.x v1, a1 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 3 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 80 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v4, v16, v12 -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v4, v16, v8, v0.t +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_11) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu @@ -543,56 +540,79 @@ ; RV32-NEXT: vle32.v v24, (a1) ; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 1008 -; RV32-NEXT: vmv.s.x v2, a1 +; RV32-NEXT: vmv.s.x v16, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v0, v24 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v16, v24 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 56 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v12, v16, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 56 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vslideup.vi v4, v8, 0 +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_13) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 52 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 80 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 52 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -602,38 +622,29 @@ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a2, %hi(.LCPI6_15) ; RV32-NEXT: addi a2, a2, %lo(.LCPI6_15) -; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vle32.v v8, (a2) +; RV32-NEXT: vle32.v v24, (a1) +; RV32-NEXT: vle32.v v16, (a2) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 6 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 73 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v24, v16 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 49 +; RV32-NEXT: li a2, 72 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 6 -; RV32-NEXT: add a1, a2, a1 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV32-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 45 +; RV32-NEXT: li a2, 52 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -643,18 +654,24 @@ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vse32.v v12, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vse32.v v4, (a1) +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 192 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a3, a2, 5 -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: li a3, 36 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 44 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 @@ -662,21 +679,21 @@ ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 28 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 41 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 82 +; RV32-NEXT: li a1, 88 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 @@ -687,148 +704,155 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 90 +; RV64-NEXT: li a3, 94 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xda, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 90 * vlenb -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: addi a2, a1, 128 -; RV64-NEXT: vle64.v v16, (a2) +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xde, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 94 * vlenb +; RV64-NEXT: addi a2, a1, 256 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 6 -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: li a3, 85 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, a1, 128 +; RV64-NEXT: vle64.v v24, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 73 +; RV64-NEXT: li a3, 61 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 69 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vid.v v8 -; RV64-NEXT: li a2, 6 -; RV64-NEXT: vmul.vx v8, v8, a2 -; RV64-NEXT: vrgather.vv v24, v0, v8 +; RV64-NEXT: li a1, 6 +; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vrgather.vv v16, v0, v8 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 81 +; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 45 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vadd.vi v8, v8, -16 -; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 5 -; RV64-NEXT: add a2, a3, a2 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV64-NEXT: addi a1, a1, 256 -; RV64-NEXT: vle64.v v16, (a1) -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: li a1, 128 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v8, v16, 4 -; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v16, 8 +; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 57 +; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: li a1, 128 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 85 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v8, v24, 4 +; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 45 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vslidedown.vi v16, v24, 8 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t +; RV64-NEXT: vrgather.vi v8, v16, 2, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 49 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vslideup.vi v8, v16, 0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 81 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v8, v16, 1 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v8, v24, 1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 73 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v0, v8 -; RV64-NEXT: vadd.vi v8, v16, -15 +; RV64-NEXT: vrgather.vv v16, v0, v8 +; RV64-NEXT: vadd.vi v8, v24, -15 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 6 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 61 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 57 +; RV64-NEXT: li a2, 85 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v16, 5 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v8, v24, 5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 49 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v16, 3, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v8, v24, 3, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vslideup.vi v8, v24, 0 +; RV64-NEXT: vslideup.vi v8, v16, 0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 25 +; RV64-NEXT: li a2, 29 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -836,76 +860,89 @@ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vmv.v.i v8, 6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v20, zero +; RV64-NEXT: vmv.s.x v16, zero ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vslideup.vi v8, v20, 5 +; RV64-NEXT: vslideup.vi v8, v16, 5 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 13 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 57 +; RV64-NEXT: li a2, 85 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v12, v24, v8 -; RV64-NEXT: vrgather.vi v12, v16, 4, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v12, v16, v8 +; RV64-NEXT: vrgather.vi v12, v24, 4, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 81 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v0, v8, 2 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v0, v16, 2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 73 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v16, v0 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v24, v0 ; RV64-NEXT: li a1, 24 +; RV64-NEXT: vadd.vi v24, v16, -14 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 21 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v8, v8, -14 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 6 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 61 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 21 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v8, v24, 0 +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v12, v8, 0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: li a1, 1 ; RV64-NEXT: vmv.v.i v8, 7 @@ -926,78 +963,78 @@ ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 57 +; RV64-NEXT: li a2, 85 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v20, v16, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 49 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v8, 5, v0.t +; RV64-NEXT: vrgather.vi v20, v8, 5, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 81 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v16, v0, 3 +; RV64-NEXT: vadd.vi v24, v0, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 73 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v8, v0, -13 +; RV64-NEXT: vadd.vi v24, v0, -13 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 6 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 61 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v8, v16, 0 +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v12, v8, 0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 13 @@ -1006,54 +1043,55 @@ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vslideup.vi v16, v8, 6 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: li a1, 192 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 85 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v20, v8, 2 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 57 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v8, 2 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 49 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgather.vv v20, v8, v16, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 81 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v16, v0, 4 +; RV64-NEXT: vadd.vi v24, v0, 4 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 73 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 13 ; RV64-NEXT: mul a1, a1, a2 @@ -1061,34 +1099,34 @@ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: li a1, 28 -; RV64-NEXT: vmv.s.x v16, a1 ; RV64-NEXT: vadd.vi v8, v0, -12 -; RV64-NEXT: vmv1r.v v0, v16 -; RV64-NEXT: vmv1r.v v1, v16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vmv.s.x v2, a1 +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 6 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 61 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 13 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v8, v16, 0 +; RV64-NEXT: vslideup.vi v8, v24, 0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 37 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill @@ -1108,7 +1146,7 @@ ; RV64-NEXT: vslideup.vi v16, v8, 6 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 57 +; RV64-NEXT: li a2, 85 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1119,7 +1157,7 @@ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 49 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1127,34 +1165,34 @@ ; RV64-NEXT: vrgather.vv v4, v8, v16, v0.t ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 81 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v16, v8, 5 +; RV64-NEXT: vadd.vi v24, v8, 5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 73 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 81 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v16, v16, -11 -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vadd.vi v24, v16, -11 +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 6 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 61 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vslideup.vi v4, v8, 0 ; RV64-NEXT: addi a1, a0, 320 @@ -1162,15 +1200,15 @@ ; RV64-NEXT: vse64.v v4, (a1) ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 5 -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: li a3, 37 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 41 +; RV64-NEXT: li a3, 45 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -1178,7 +1216,7 @@ ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 45 +; RV64-NEXT: li a3, 49 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -1186,21 +1224,21 @@ ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 25 +; RV64-NEXT: li a3, 29 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 90 +; RV64-NEXT: li a1, 94 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -745,12 +745,11 @@ ; RV32-LMULMAX8: # %bb.0: ; RV32-LMULMAX8-NEXT: lui a0, 748388 ; RV32-LMULMAX8-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-LMULMAX8-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV32-LMULMAX8-NEXT: vmv.s.x v8, a0 ; RV32-LMULMAX8-NEXT: lui a0, 748384 ; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 ; RV32-LMULMAX8-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX8-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV32-LMULMAX8-NEXT: vslideup.vi v0, v8, 1 ; RV32-LMULMAX8-NEXT: lui a0, 551776 ; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -477,18 +477,16 @@ ; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_3 ; RV64ZVE32F-NEXT: .LBB8_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB8_4 @@ -545,18 +543,16 @@ ; RV64ZVE32F-NEXT: .LBB9_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_3 ; RV64ZVE32F-NEXT: .LBB9_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB9_4 @@ -647,54 +643,48 @@ ; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_3 ; RV64ZVE32F-NEXT: .LBB11_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_4 ; RV64ZVE32F-NEXT: .LBB11_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_5 ; RV64ZVE32F-NEXT: .LBB11_13: # %cond.load10 ; RV64ZVE32F-NEXT: ld a2, 32(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_6 ; RV64ZVE32F-NEXT: .LBB11_14: # %cond.load13 ; RV64ZVE32F-NEXT: ld a2, 40(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_7 ; RV64ZVE32F-NEXT: .LBB11_15: # %cond.load16 ; RV64ZVE32F-NEXT: ld a2, 48(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB11_8 @@ -811,9 +801,8 @@ ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_9 @@ -1210,18 +1199,16 @@ ; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_3 ; RV64ZVE32F-NEXT: .LBB19_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB19_4 @@ -1278,18 +1265,16 @@ ; RV64ZVE32F-NEXT: .LBB20_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_3 ; RV64ZVE32F-NEXT: .LBB20_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB20_4 @@ -1380,54 +1365,48 @@ ; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_3 ; RV64ZVE32F-NEXT: .LBB22_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_4 ; RV64ZVE32F-NEXT: .LBB22_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_5 ; RV64ZVE32F-NEXT: .LBB22_13: # %cond.load10 ; RV64ZVE32F-NEXT: ld a2, 32(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_6 ; RV64ZVE32F-NEXT: .LBB22_14: # %cond.load13 ; RV64ZVE32F-NEXT: ld a2, 40(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_7 ; RV64ZVE32F-NEXT: .LBB22_15: # %cond.load16 ; RV64ZVE32F-NEXT: ld a2, 48(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB22_8 @@ -1486,7 +1465,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 @@ -1500,7 +1478,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 @@ -1522,7 +1499,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 5 @@ -1544,7 +1520,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 @@ -1556,9 +1531,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_9 @@ -1568,7 +1542,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 @@ -1581,8 +1554,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -1635,7 +1608,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 @@ -1649,7 +1621,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 @@ -1671,7 +1642,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 5 @@ -1693,7 +1663,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 @@ -1705,9 +1674,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_9 @@ -1717,7 +1685,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 @@ -1730,8 +1697,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -1787,7 +1754,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 @@ -1802,7 +1768,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 @@ -1825,7 +1790,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 5 @@ -1848,7 +1812,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 @@ -1861,9 +1824,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_9 @@ -1874,7 +1836,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 @@ -1888,8 +1849,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -2010,9 +1971,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_9 @@ -2303,18 +2263,16 @@ ; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_3 ; RV64ZVE32F-NEXT: .LBB31_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB31_4 @@ -2370,18 +2328,16 @@ ; RV64ZVE32F-NEXT: .LBB32_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_3 ; RV64ZVE32F-NEXT: .LBB32_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB32_4 @@ -2472,54 +2428,48 @@ ; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_3 ; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_4 ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_5 ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.load10 ; RV64ZVE32F-NEXT: ld a2, 32(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_6 ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.load13 ; RV64ZVE32F-NEXT: ld a2, 40(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_7 ; RV64ZVE32F-NEXT: .LBB34_15: # %cond.load16 ; RV64ZVE32F-NEXT: ld a2, 48(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB34_8 @@ -2577,7 +2527,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -2591,7 +2540,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -2613,7 +2561,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -2635,7 +2582,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -2647,9 +2593,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_9 @@ -2659,7 +2604,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -2672,7 +2616,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -2726,7 +2669,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -2740,7 +2682,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -2762,7 +2703,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -2784,7 +2724,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -2796,9 +2735,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_9 @@ -2808,7 +2746,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -2821,7 +2758,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -2878,7 +2814,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -2893,7 +2828,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -2916,7 +2850,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -2939,7 +2872,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -2952,9 +2884,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_9 @@ -2965,7 +2896,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -2979,7 +2909,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -3035,7 +2964,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -3049,7 +2977,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -3071,7 +2998,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -3093,7 +3019,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -3105,9 +3030,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_9 @@ -3117,7 +3041,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -3130,7 +3053,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -3185,7 +3107,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -3199,7 +3120,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -3221,7 +3141,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -3243,7 +3162,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -3255,9 +3173,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_9 @@ -3267,7 +3184,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -3280,7 +3196,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -3340,7 +3255,6 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -3355,7 +3269,6 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -3378,7 +3291,6 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -3401,7 +3313,6 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -3414,9 +3325,8 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_9 @@ -3427,7 +3337,6 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -3441,7 +3350,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -3559,13 +3467,13 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_7 ; RV64ZVE32F-NEXT: .LBB41_14: # %cond.load10 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_8 @@ -7194,18 +7102,16 @@ ; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_3 ; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB60_4 @@ -7262,18 +7168,16 @@ ; RV64ZVE32F-NEXT: .LBB61_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_3 ; RV64ZVE32F-NEXT: .LBB61_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB61_4 @@ -7364,54 +7268,48 @@ ; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_3 ; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_4 ; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_5 ; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10 ; RV64ZVE32F-NEXT: ld a2, 32(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_6 ; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13 ; RV64ZVE32F-NEXT: ld a2, 40(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_7 ; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16 ; RV64ZVE32F-NEXT: ld a2, 48(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB63_8 @@ -7470,7 +7368,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 @@ -7484,7 +7381,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 @@ -7506,7 +7402,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 5 @@ -7528,7 +7423,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 @@ -7540,9 +7434,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_9 @@ -7552,7 +7445,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 @@ -7565,8 +7457,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7619,7 +7511,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 @@ -7633,7 +7524,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 @@ -7655,7 +7545,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 5 @@ -7677,7 +7566,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 @@ -7689,9 +7577,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_9 @@ -7701,7 +7588,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 @@ -7714,8 +7600,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7771,7 +7657,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 @@ -7786,7 +7671,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 @@ -7809,7 +7693,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 5 @@ -7832,7 +7715,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 @@ -7845,9 +7727,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_9 @@ -7858,7 +7739,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 @@ -7872,8 +7752,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 1 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret @@ -7994,9 +7874,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_9 @@ -8161,18 +8040,16 @@ ; RV64ZVE32F-NEXT: .LBB70_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_3 ; RV64ZVE32F-NEXT: .LBB70_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB70_4 @@ -8228,18 +8105,16 @@ ; RV64ZVE32F-NEXT: .LBB71_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_3 ; RV64ZVE32F-NEXT: .LBB71_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 ; RV64ZVE32F-NEXT: andi a1, a1, 8 ; RV64ZVE32F-NEXT: beqz a1, .LBB71_4 @@ -8330,54 +8205,48 @@ ; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_3 ; RV64ZVE32F-NEXT: .LBB73_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_4 ; RV64ZVE32F-NEXT: .LBB73_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_5 ; RV64ZVE32F-NEXT: .LBB73_13: # %cond.load10 ; RV64ZVE32F-NEXT: ld a2, 32(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_6 ; RV64ZVE32F-NEXT: .LBB73_14: # %cond.load13 ; RV64ZVE32F-NEXT: ld a2, 40(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_7 ; RV64ZVE32F-NEXT: .LBB73_15: # %cond.load16 ; RV64ZVE32F-NEXT: ld a2, 48(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 ; RV64ZVE32F-NEXT: beqz a1, .LBB73_8 @@ -8435,7 +8304,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -8449,7 +8317,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -8471,7 +8338,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -8493,7 +8359,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -8505,9 +8370,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_9 @@ -8517,7 +8381,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -8530,7 +8393,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -8584,7 +8446,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -8598,7 +8459,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -8620,7 +8480,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -8642,7 +8501,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -8654,9 +8512,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_9 @@ -8666,7 +8523,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -8679,7 +8535,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -8736,7 +8591,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -8751,7 +8605,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -8774,7 +8627,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -8797,7 +8649,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -8810,9 +8661,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_9 @@ -8823,7 +8673,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -8837,7 +8686,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -8893,7 +8741,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -8907,7 +8754,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -8929,7 +8775,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -8951,7 +8796,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -8963,9 +8807,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_9 @@ -8975,7 +8818,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -8988,7 +8830,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -9043,7 +8884,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -9057,7 +8897,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -9079,7 +8918,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -9101,7 +8939,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -9113,9 +8950,8 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_9 @@ -9125,7 +8961,6 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -9138,7 +8973,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -9198,7 +9032,6 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -9213,7 +9046,6 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -9236,7 +9068,6 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 @@ -9259,7 +9090,6 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 @@ -9272,9 +9102,8 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_9 @@ -9285,7 +9114,6 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 @@ -9299,7 +9127,6 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 @@ -9417,13 +9244,13 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_7 ; RV64ZVE32F-NEXT: .LBB80_14: # %cond.load10 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_8 @@ -12381,9 +12208,8 @@ ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_9 @@ -12413,9 +12239,8 @@ ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8 ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_14 @@ -12436,9 +12261,8 @@ ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 12 ; RV64ZVE32F-NEXT: slli a2, a1, 50 ; RV64ZVE32F-NEXT: bltz a2, .LBB97_20 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -129,7 +129,6 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t @@ -157,7 +156,6 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB9_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -301,10 +301,9 @@ define float @vreduce_ord_fwadd_v1f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -348,10 +347,9 @@ define float @vreduce_fwadd_v2f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_fwadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -365,10 +363,9 @@ define float @vreduce_ord_fwadd_v2f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -412,10 +409,9 @@ define float @vreduce_fwadd_v4f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_fwadd_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -429,10 +425,9 @@ define float @vreduce_ord_fwadd_v4f32(ptr %x, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -478,9 +473,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -496,9 +489,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -544,9 +535,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -562,9 +551,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -613,9 +600,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -632,9 +617,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -692,7 +675,6 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v24, v8, v16 @@ -715,7 +697,6 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 @@ -777,10 +758,9 @@ define double @vreduce_ord_fwadd_v1f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -824,10 +804,9 @@ define double @vreduce_fwadd_v2f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_fwadd_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -841,10 +820,9 @@ define double @vreduce_ord_fwadd_v2f64(ptr %x, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -890,9 +868,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -908,9 +884,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -956,9 +930,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -974,9 +946,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1022,9 +992,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1040,9 +1008,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1099,8 +1065,8 @@ ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfredusum.vs v8, v24, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1118,7 +1084,6 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v8, 16 -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -853,7 +853,6 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB49_2: -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v25, a0 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -212,10 +212,9 @@ define i16 @vwreduce_add_v2i16(ptr %x) { ; CHECK-LABEL: vwreduce_add_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -229,10 +228,9 @@ define i16 @vwreduce_uadd_v2i16(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -262,10 +260,9 @@ define i16 @vwreduce_add_v4i16(ptr %x) { ; CHECK-LABEL: vwreduce_add_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -279,10 +276,9 @@ define i16 @vwreduce_uadd_v4i16(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -312,10 +308,9 @@ define i16 @vwreduce_add_v8i16(ptr %x) { ; CHECK-LABEL: vwreduce_add_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -329,10 +324,9 @@ define i16 @vwreduce_uadd_v8i16(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -364,9 +358,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -382,9 +374,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -418,9 +408,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -437,9 +425,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -473,9 +459,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -492,9 +476,7 @@ ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -534,7 +516,6 @@ ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwadd.vv v24, v8, v16 @@ -557,7 +538,6 @@ ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v24, v8, v16 @@ -632,10 +612,9 @@ define i32 @vwreduce_add_v2i32(ptr %x) { ; CHECK-LABEL: vwreduce_add_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -649,10 +628,9 @@ define i32 @vwreduce_uadd_v2i32(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -682,10 +660,9 @@ define i32 @vwreduce_add_v4i32(ptr %x) { ; CHECK-LABEL: vwreduce_add_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -699,10 +676,9 @@ define i32 @vwreduce_uadd_v4i32(ptr %x) { ; CHECK-LABEL: vwreduce_uadd_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -734,9 +710,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -752,9 +726,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -786,9 +758,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -804,9 +774,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -840,9 +808,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -859,9 +825,7 @@ ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 0, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -901,7 +865,6 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwadd.vv v24, v8, v16 @@ -924,7 +887,6 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v24, v8, v16 @@ -1044,10 +1006,9 @@ define i64 @vwreduce_add_v2i64(ptr %x) { ; RV32-LABEL: vwreduce_add_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1059,10 +1020,9 @@ ; ; RV64-LABEL: vwreduce_add_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1076,10 +1036,9 @@ define i64 @vwreduce_uadd_v2i64(ptr %x) { ; RV32-LABEL: vwreduce_uadd_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v9 ; RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1091,10 +1050,9 @@ ; ; RV64-LABEL: vwreduce_uadd_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1139,9 +1097,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v9 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1154,9 +1110,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1172,9 +1126,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v9, zero -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v9 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1187,9 +1139,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1234,9 +1184,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v10, zero -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v10 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1249,9 +1197,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1267,9 +1213,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v10, zero -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v10 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1282,9 +1226,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1329,9 +1271,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v12, zero -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vwredsum.vs v8, v8, v12 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1344,9 +1284,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v12 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1362,9 +1300,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.s.x v12, zero -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vwredsumu.vs v8, v8, v12 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vmv.x.s a0, v8 @@ -1377,9 +1313,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v12 ; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1435,8 +1369,8 @@ ; RV32-NEXT: vslidedown.vi v16, v8, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vwadd.vv v24, v8, v16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vmv.s.x v8, zero +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vredsum.vs v8, v24, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1453,8 +1387,8 @@ ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwadd.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v8, zero +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vredsum.vs v8, v24, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1474,8 +1408,8 @@ ; RV32-NEXT: vslidedown.vi v16, v8, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vwaddu.vv v24, v8, v16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vmv.s.x v8, zero +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vredsum.vs v8, v24, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1492,8 +1426,8 @@ ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwaddu.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v8, zero +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vredsum.vs v8, v24, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -78,6 +78,7 @@ ; RV32-NEXT: lbu a0, 0(a0) ; RV32-NEXT: slli a1, a1, 8 ; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; RV32-NEXT: vmv.s.x v8, a0 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV32-NEXT: vslideup.vi v9, v8, 1 @@ -115,6 +116,7 @@ ; RV64-NEXT: lbu a0, 0(a0) ; RV64-NEXT: slli a1, a1, 8 ; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; RV64-NEXT: vmv.s.x v8, a0 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV64-NEXT: vslideup.vi v9, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -472,9 +472,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -482,9 +481,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -631,9 +629,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -641,9 +638,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -799,9 +795,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -809,9 +804,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -1426,9 +1420,8 @@ ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -1436,9 +1429,8 @@ ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -1446,9 +1438,8 @@ ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 @@ -1456,9 +1447,8 @@ ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 @@ -1466,9 +1456,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 @@ -1476,9 +1465,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 @@ -1714,9 +1702,8 @@ ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -1724,9 +1711,8 @@ ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -1734,9 +1720,8 @@ ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 @@ -1744,9 +1729,8 @@ ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 @@ -1754,9 +1738,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 @@ -1764,9 +1747,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 @@ -2022,9 +2004,8 @@ ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -2032,9 +2013,8 @@ ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -2042,9 +2022,8 @@ ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 @@ -2052,9 +2031,8 @@ ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 @@ -2062,9 +2040,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 @@ -2072,9 +2049,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 @@ -3780,9 +3756,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -3790,9 +3765,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -3937,9 +3911,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -3947,9 +3920,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -4104,9 +4076,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -4114,9 +4085,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -4719,9 +4689,8 @@ ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -4729,9 +4698,8 @@ ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -4739,9 +4707,8 @@ ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 @@ -4749,9 +4716,8 @@ ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 @@ -4759,9 +4725,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 @@ -4769,9 +4734,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 @@ -5003,9 +4967,8 @@ ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -5013,9 +4976,8 @@ ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -5023,9 +4985,8 @@ ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 @@ -5033,9 +4994,8 @@ ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 @@ -5043,9 +5003,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 @@ -5053,9 +5012,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 @@ -5310,9 +5268,8 @@ ; CHECK-V-NEXT: mv a0, s6 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 2, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v8, v10, 1 @@ -5320,9 +5277,8 @@ ; CHECK-V-NEXT: mv a0, s5 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 3, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 2 @@ -5330,9 +5286,8 @@ ; CHECK-V-NEXT: mv a0, s4 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 3 @@ -5340,9 +5295,8 @@ ; CHECK-V-NEXT: mv a0, s3 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 4 @@ -5350,9 +5304,8 @@ ; CHECK-V-NEXT: mv a0, s2 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 6, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 5 @@ -5360,9 +5313,8 @@ ; CHECK-V-NEXT: mv a0, s1 ; CHECK-V-NEXT: call __extendhfsf2@plt ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vsetivli zero, 7, e32, m2, tu, ma +; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; CHECK-V-NEXT: vslideup.vi v10, v8, 6 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -17,9 +17,8 @@ define @insertelt_nxv1f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv1f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -52,9 +51,8 @@ define @insertelt_nxv2f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv2f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -87,9 +85,8 @@ define @insertelt_nxv4f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv4f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -122,9 +119,8 @@ define @insertelt_nxv8f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv8f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -157,9 +153,8 @@ define @insertelt_nxv16f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv16f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma +; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -192,9 +187,8 @@ define @insertelt_nxv32f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv32f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma +; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 3 @@ -227,9 +221,8 @@ define @insertelt_nxv1f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv1f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, mf2, tu, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -262,9 +255,8 @@ define @insertelt_nxv2f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv2f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -297,9 +289,8 @@ define @insertelt_nxv4f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv4f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -332,9 +323,8 @@ define @insertelt_nxv8f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv8f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -367,9 +357,8 @@ define @insertelt_nxv16f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv16f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma +; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 3 @@ -402,9 +391,8 @@ define @insertelt_nxv1f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv1f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v9, fa0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -437,9 +425,8 @@ define @insertelt_nxv2f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv2f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -472,9 +459,8 @@ define @insertelt_nxv4f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv4f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma +; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 @@ -507,9 +493,8 @@ define @insertelt_nxv8f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv8f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -15,9 +15,8 @@ define @insertelt_nxv1i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv1i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf8, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -50,9 +49,8 @@ define @insertelt_nxv2i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv2i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -85,9 +83,8 @@ define @insertelt_nxv4i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv4i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -120,9 +117,8 @@ define @insertelt_nxv8i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -155,9 +151,8 @@ define @insertelt_nxv16i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m2, tu, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -190,9 +185,8 @@ define @insertelt_nxv32i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m4, tu, ma +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -225,9 +219,8 @@ define @insertelt_nxv64i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m8, tu, ma +; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -260,9 +253,8 @@ define @insertelt_nxv1i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv1i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -295,9 +287,8 @@ define @insertelt_nxv2i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv2i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -330,9 +321,8 @@ define @insertelt_nxv4i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv4i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -365,9 +355,8 @@ define @insertelt_nxv8i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -400,9 +389,8 @@ define @insertelt_nxv16i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -435,9 +423,8 @@ define @insertelt_nxv32i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma +; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -470,9 +457,8 @@ define @insertelt_nxv1i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv1i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -505,9 +491,8 @@ define @insertelt_nxv2i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv2i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -540,9 +525,8 @@ define @insertelt_nxv4i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv4i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -575,9 +559,8 @@ define @insertelt_nxv8i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv8i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -610,9 +593,8 @@ define @insertelt_nxv16i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv16i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma +; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -800,9 +782,8 @@ ; CHECK-LABEL: insertelt_nxv2i64_imm_c10: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 10, i32 3 @@ -838,9 +819,8 @@ ; CHECK-LABEL: insertelt_nxv2i64_imm_cn1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 -1, i32 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -15,9 +15,8 @@ define @insertelt_nxv1i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv1i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf8, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -50,9 +49,8 @@ define @insertelt_nxv2i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv2i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -85,9 +83,8 @@ define @insertelt_nxv4i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv4i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -120,9 +117,8 @@ define @insertelt_nxv8i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -155,9 +151,8 @@ define @insertelt_nxv16i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m2, tu, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -190,9 +185,8 @@ define @insertelt_nxv32i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m4, tu, ma +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -225,9 +219,8 @@ define @insertelt_nxv64i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m8, tu, ma +; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 3 @@ -260,9 +253,8 @@ define @insertelt_nxv1i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv1i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf4, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -295,9 +287,8 @@ define @insertelt_nxv2i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv2i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -330,9 +321,8 @@ define @insertelt_nxv4i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv4i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -365,9 +355,8 @@ define @insertelt_nxv8i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -400,9 +389,8 @@ define @insertelt_nxv16i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -435,9 +423,8 @@ define @insertelt_nxv32i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma +; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 3 @@ -470,9 +457,8 @@ define @insertelt_nxv1i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv1i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, mf2, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -505,9 +491,8 @@ define @insertelt_nxv2i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv2i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -540,9 +525,8 @@ define @insertelt_nxv4i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv4i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -575,9 +559,8 @@ define @insertelt_nxv8i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -610,9 +593,8 @@ define @insertelt_nxv16i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma +; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 3 @@ -645,9 +627,8 @@ define @insertelt_nxv1i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv1i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 @@ -682,9 +663,8 @@ define @insertelt_nxv2i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv2i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 @@ -719,9 +699,8 @@ define @insertelt_nxv4i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma +; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vslideup.vi v8, v12, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 @@ -756,9 +735,8 @@ define @insertelt_nxv8i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -75,14 +75,12 @@ define <16 x i8> @v8i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: v8i8_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 255 -; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -110,7 +108,7 @@ ; RV32-NEXT: lui a0, %hi(.LCPI7_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; RV32-NEXT: vle8.v v12, (a0) ; RV32-NEXT: vmv1r.v v14, v9 ; RV32-NEXT: vrgather.vv v10, v8, v12 @@ -118,9 +116,7 @@ ; RV32-NEXT: vrsub.vi v8, v8, 15 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; RV32-NEXT: vrgather.vv v10, v14, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret @@ -130,7 +126,7 @@ ; RV64-NEXT: lui a0, %hi(.LCPI7_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0) ; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; RV64-NEXT: vle8.v v12, (a0) ; RV64-NEXT: vmv1r.v v14, v9 ; RV64-NEXT: vrgather.vv v10, v8, v12 @@ -138,9 +134,7 @@ ; RV64-NEXT: vrsub.vi v8, v8, 15 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; RV64-NEXT: vrgather.vv v10, v14, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret @@ -191,11 +185,12 @@ define <8 x i16> @v4i16_2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: v4i16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t @@ -255,19 +250,17 @@ ; RV32-NEXT: lui a0, %hi(.LCPI15_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI15_0) ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; RV32-NEXT: vle16.v v20, (a0) -; RV32-NEXT: vmv2r.v v16, v10 -; RV32-NEXT: vmv2r.v v12, v8 -; RV32-NEXT: vrgather.vv v8, v12, v20 -; RV32-NEXT: vid.v v12 -; RV32-NEXT: vrsub.vi v12, v12, 15 +; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; RV32-NEXT: vle16.v v16, (a0) +; RV32-NEXT: vmv2r.v v20, v10 +; RV32-NEXT: vrgather.vv v12, v8, v16 +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vrsub.vi v8, v8, 15 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV32-NEXT: vrgather.vv v8, v16, v12, v0.t +; RV32-NEXT: vrgather.vv v12, v20, v8, v0.t +; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: v16i16_2: @@ -275,19 +268,17 @@ ; RV64-NEXT: lui a0, %hi(.LCPI15_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI15_0) ; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; RV64-NEXT: vle16.v v20, (a0) -; RV64-NEXT: vmv2r.v v16, v10 -; RV64-NEXT: vmv2r.v v12, v8 -; RV64-NEXT: vrgather.vv v8, v12, v20 -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vrsub.vi v12, v12, 15 +; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; RV64-NEXT: vle16.v v16, (a0) +; RV64-NEXT: vmv2r.v v20, v10 +; RV64-NEXT: vrgather.vv v12, v8, v16 +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vrsub.vi v8, v8, 15 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; RV64-NEXT: vrgather.vv v8, v16, v12, v0.t +; RV64-NEXT: vrgather.vv v12, v20, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %v32i16 @@ -337,11 +328,12 @@ ; CHECK-LABEL: v4i32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v9 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v16 ; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v14, 3 ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t @@ -368,11 +360,12 @@ ; CHECK-LABEL: v8i32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vid.v v20 ; CHECK-NEXT: vrsub.vi v24, v20, 15 ; CHECK-NEXT: vrgather.vv v12, v8, v24 ; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v20, 7 ; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t @@ -495,8 +488,8 @@ ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v12, v8, v19 ; RV32-NEXT: li a0, 15 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vrsub.vi v8, v18, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t @@ -506,11 +499,12 @@ ; RV64-LABEL: v4i64_2: ; RV64: # %bb.0: ; RV64-NEXT: vmv2r.v v16, v10 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vid.v v20 ; RV64-NEXT: vrsub.vi v24, v20, 7 ; RV64-NEXT: vrgather.vv v12, v8, v24 ; RV64-NEXT: li a0, 15 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vrsub.vi v8, v20, 3 ; RV64-NEXT: vrgather.vv v12, v16, v8, v0.t @@ -563,11 +557,12 @@ define <8 x half> @v4f16_2(<4 x half> %a, <4 x half> %b) { ; CHECK-LABEL: v4f16_2: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t @@ -683,11 +678,12 @@ ; CHECK-LABEL: v4f32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v9 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v16 ; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v14, 3 ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t @@ -714,11 +710,12 @@ ; CHECK-LABEL: v8f32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vid.v v20 ; CHECK-NEXT: vrsub.vi v24, v20, 15 ; CHECK-NEXT: vrgather.vv v12, v8, v24 ; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v20, 7 ; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t @@ -788,8 +785,8 @@ ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v12, v8, v19 ; RV32-NEXT: li a0, 15 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vrsub.vi v8, v18, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t @@ -799,11 +796,12 @@ ; RV64-LABEL: v4f64_2: ; RV64: # %bb.0: ; RV64-NEXT: vmv2r.v v16, v10 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vid.v v20 ; RV64-NEXT: vrsub.vi v24, v20, 7 ; RV64-NEXT: vrgather.vv v12, v8, v24 ; RV64-NEXT: li a0, 15 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vrsub.vi v8, v20, 3 ; RV64-NEXT: vrgather.vv v12, v16, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -15,13 +15,11 @@ ; RV32-NEXT: vrgather.vv v9, v10, v11 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -256 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vadd.vi v12, v11, -16 ; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t ; RV32-NEXT: vmsne.vi v9, v9, 0 @@ -43,13 +41,11 @@ ; RV64-NEXT: vrgather.vv v9, v10, v11 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -256 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vadd.vi v12, v11, -16 ; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t ; RV64-NEXT: vmsne.vi v9, v9, 0 @@ -110,8 +106,8 @@ ; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 2 ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t ; CHECK-NEXT: vslideup.vi v8, v10, 1 @@ -197,8 +193,8 @@ ; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 2 ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t ; CHECK-NEXT: vslideup.vi v8, v10, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -97,7 +97,6 @@ ; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v24, v0, a2 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: mv a2, a0 @@ -127,7 +126,6 @@ ; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v24, v0, a2 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: vfmv.s.f v25, fa0 ; CHECK-NEXT: mv a2, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -1162,7 +1162,6 @@ ; RV32-NEXT: sltu a4, a1, a2 ; RV32-NEXT: addi a4, a4, -1 ; RV32-NEXT: and a2, a4, a2 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vmv.s.x v25, a0 ; RV32-NEXT: bltu a1, a3, .LBB67_2 ; RV32-NEXT: # %bb.1: @@ -1196,7 +1195,6 @@ ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a3 ; RV64-NEXT: .LBB67_2: -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64-NEXT: vmv.s.x v25, a2 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t