diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4825,7 +4825,7 @@ // register size. Therefore we must slide the vector group up the full // amount. if (SubVecVT.isFixedLengthVector()) { - if (OrigIdx == 0 && Vec.isUndef()) + if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector()) return Op; MVT ContainerVT = VecVT; if (VecVT.isFixedLengthVector()) { @@ -4835,6 +4835,10 @@ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, DAG.getUNDEF(ContainerVT), SubVec, DAG.getConstant(0, DL, XLenVT)); + if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) { + SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget); + return DAG.getBitcast(Op.getValueType(), SubVec); + } SDValue Mask = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first; // Set the vector length to only the number of elements we care about. Note diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -847,16 +847,11 @@ ; LMULMAX1-LABEL: truncstore_v16i16_v16i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 8 -; LMULMAX1-NEXT: vse8.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 8 +; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v16i16_v16i8: @@ -1162,20 +1157,16 @@ ; LMULMAX1-LABEL: truncstore_v8i32_v8i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 -; LMULMAX1-NEXT: vse8.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 +; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v8i32_v8i8: @@ -1195,16 +1186,11 @@ ; LMULMAX1-LABEL: truncstore_v8i32_v8i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 -; LMULMAX1-NEXT: vse16.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 +; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v8i32_v8i16: @@ -1284,32 +1270,28 @@ ; LMULMAX1-LABEL: truncstore_v16i32_v16i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 12, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 8 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 8 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 12 -; LMULMAX1-NEXT: vse8.v v12, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 12 +; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v16i32_v16i8: @@ -1329,27 +1311,18 @@ ; LMULMAX1-LABEL: truncstore_v16i32_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v13, v12 -; LMULMAX1-NEXT: vslideup.vi v13, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v8, 4 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 0 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v12, (a1) -; LMULMAX1-NEXT: vse16.v v13, (a0) +; LMULMAX1-NEXT: vse16.v v10, (a1) +; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v16i32_v16i16: @@ -1505,24 +1478,20 @@ ; LMULMAX1-LABEL: truncstore_v4i64_v4i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 2 -; LMULMAX1-NEXT: vse8.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 +; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v4i64_v4i8: @@ -1544,20 +1513,16 @@ ; LMULMAX1-LABEL: truncstore_v4i64_v4i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 2 -; LMULMAX1-NEXT: vse16.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 +; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v4i64_v4i16: @@ -1577,16 +1542,11 @@ ; LMULMAX1-LABEL: truncstore_v4i64_v4i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 2 -; LMULMAX1-NEXT: vse32.v v10, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 +; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v4i64_v4i32: @@ -1604,40 +1564,36 @@ ; LMULMAX1-LABEL: truncstore_v8i64_v8i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse8.v v12, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 +; LMULMAX1-NEXT: vse8.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v8i64_v8i8: @@ -1659,32 +1615,28 @@ ; LMULMAX1-LABEL: truncstore_v8i64_v8i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse16.v v12, (a0) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 +; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v8i64_v8i16: @@ -1704,27 +1656,18 @@ ; LMULMAX1-LABEL: truncstore_v8i64_v8i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v13, v12 -; LMULMAX1-NEXT: vslideup.vi v13, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 0 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v12, (a1) -; LMULMAX1-NEXT: vse32.v v13, (a0) +; LMULMAX1-NEXT: vse32.v v10, (a1) +; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v8i64_v8i32: @@ -1742,21 +1685,17 @@ ; LMULMAX1-LABEL: truncstore_v16i64_v16i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v16, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v16, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, m1, tu, mu ; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu @@ -1813,24 +1752,20 @@ ; LMULMAX4-LABEL: truncstore_v16i64_v16i8: ; LMULMAX4: # %bb.0: ; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX4-NEXT: vnsrl.wi v16, v8, 0 +; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX4-NEXT: vnsrl.wi v8, v16, 0 +; LMULMAX4-NEXT: vnsrl.wi v12, v16, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; LMULMAX4-NEXT: vmv.v.i v9, 0 -; LMULMAX4-NEXT: vsetivli zero, 8, e8, m1, tu, mu -; LMULMAX4-NEXT: vslideup.vi v9, v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX4-NEXT: vnsrl.wi v10, v12, 0 +; LMULMAX4-NEXT: vnsrl.wi v12, v12, 0 +; LMULMAX4-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; LMULMAX4-NEXT: vnsrl.wi v14, v8, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; LMULMAX4-NEXT: vnsrl.wi v8, v10, 0 +; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; LMULMAX4-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX4-NEXT: vsetivli zero, 16, e8, m1, tu, mu -; LMULMAX4-NEXT: vslideup.vi v9, v8, 8 -; LMULMAX4-NEXT: vse8.v v9, (a0) +; LMULMAX4-NEXT: vslideup.vi v8, v12, 8 +; LMULMAX4-NEXT: vse8.v v8, (a0) ; LMULMAX4-NEXT: ret %y = trunc <16 x i64> %x to <16 x i8> store <16 x i8> %y, <16 x i8>* %z @@ -1841,59 +1776,52 @@ ; LMULMAX1-LABEL: truncstore_v16i64_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v16, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v17, v16 -; LMULMAX1-NEXT: vslideup.vi v17, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v17, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v17, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v17, v8, 6 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v12, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v13, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v10, v12, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v14, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v14, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v15, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 6 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 6 ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse16.v v16, (a1) -; LMULMAX1-NEXT: vse16.v v17, (a0) +; LMULMAX1-NEXT: vse16.v v10, (a1) +; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v16i64_v16i16: @@ -1906,13 +1834,9 @@ ; LMULMAX4-NEXT: vnsrl.wi v14, v8, 0 ; LMULMAX4-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; LMULMAX4-NEXT: vnsrl.wi v8, v14, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; LMULMAX4-NEXT: vmv.v.i v10, 0 -; LMULMAX4-NEXT: vsetivli zero, 8, e16, m2, tu, mu -; LMULMAX4-NEXT: vslideup.vi v10, v8, 0 ; LMULMAX4-NEXT: vsetivli zero, 16, e16, m2, tu, mu -; LMULMAX4-NEXT: vslideup.vi v10, v12, 8 -; LMULMAX4-NEXT: vse16.v v10, (a0) +; LMULMAX4-NEXT: vslideup.vi v8, v12, 8 +; LMULMAX4-NEXT: vse16.v v8, (a0) ; LMULMAX4-NEXT: ret %y = trunc <16 x i64> %x to <16 x i16> store <16 x i16> %y, <16 x i16>* %z @@ -1923,49 +1847,32 @@ ; LMULMAX1-LABEL: truncstore_v16i64_v16i32: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v16, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v17, v16 -; LMULMAX1-NEXT: vslideup.vi v17, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v17, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v9, v16 -; LMULMAX1-NEXT: vslideup.vi v9, v8, 0 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 +; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v9, v8, 2 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vmv1r.v v10, v16 -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v13, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v13, 0 +; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v11, v9, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v14, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v15, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v15, 0 +; LMULMAX1-NEXT: vnsrl.wi v12, v14, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v16, v8, 2 +; LMULMAX1-NEXT: vslideup.vi v12, v9, 2 ; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vse32.v v16, (a1) +; LMULMAX1-NEXT: vse32.v v12, (a1) ; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vse32.v v10, (a1) +; LMULMAX1-NEXT: vse32.v v11, (a1) ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vse32.v v9, (a1) -; LMULMAX1-NEXT: vse32.v v17, (a0) +; LMULMAX1-NEXT: vse32.v v10, (a1) +; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: ret ; ; LMULMAX4-LABEL: truncstore_v16i64_v16i32: @@ -1973,13 +1880,9 @@ ; LMULMAX4-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX4-NEXT: vnsrl.wi v16, v12, 0 ; LMULMAX4-NEXT: vnsrl.wi v12, v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; LMULMAX4-NEXT: vmv.v.i v8, 0 -; LMULMAX4-NEXT: vsetivli zero, 8, e32, m4, tu, mu -; LMULMAX4-NEXT: vslideup.vi v8, v12, 0 ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, tu, mu -; LMULMAX4-NEXT: vslideup.vi v8, v16, 8 -; LMULMAX4-NEXT: vse32.v v8, (a0) +; LMULMAX4-NEXT: vslideup.vi v12, v16, 8 +; LMULMAX4-NEXT: vse32.v v12, (a0) ; LMULMAX4-NEXT: ret %y = trunc <16 x i64> %x to <16 x i32> store <16 x i32> %y, <16 x i32>* %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll @@ -157,21 +157,16 @@ ; ; LMULMAX1-LABEL: fpround_v8f32_v8f16: ; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi a2, a0, 16 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-NEXT: vle32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, a0, 16 -; LMULMAX1-NEXT: vle32.v v9, (a0) +; LMULMAX1-NEXT: vle32.v v9, (a2) ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v8 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v10, v9 +; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v8, v10, 4 -; LMULMAX1-NEXT: vse16.v v8, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 +; LMULMAX1-NEXT: vse16.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x float>, <8 x float>* %x %d = fptrunc <8 x float> %a to <8 x half> @@ -205,29 +200,25 @@ ; LMULMAX1-NEXT: vfncvt.rod.f.f.w v12, v10 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v12 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rod.f.f.w v10, v11 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vfncvt.rod.f.f.w v12, v11 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v11, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v11, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rod.f.f.w v10, v9 +; LMULMAX1-NEXT: vfncvt.rod.f.f.w v11, v9 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v9, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v9, v11 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v9, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rod.f.f.w v9, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse16.v v12, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 6 +; LMULMAX1-NEXT: vse16.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x double>, <8 x double>* %x %d = fptrunc <8 x double> %a to <8 x half> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -121,32 +121,24 @@ ; ; LMULMAX1-LABEL: fp2si_v8f32_v8i1: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmclr.m v0 -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vmerge.vim v11, v10, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v8 -; LMULMAX1-NEXT: vand.vi v8, v12, 1 +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v8 +; LMULMAX1-NEXT: vand.vi v8, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v11, v12, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v11, 0 -; LMULMAX1-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v11, v9 -; LMULMAX1-NEXT: vand.vi v9, v11, 1 +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v9 +; LMULMAX1-NEXT: vand.vi v9, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v9, 0 +; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 +; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptosi <8 x float> %x to <8 x i1> ret <8 x i1> %z @@ -163,32 +155,24 @@ ; ; LMULMAX1-LABEL: fp2ui_v8f32_v8i1: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmclr.m v0 -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vmerge.vim v11, v10, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v8 -; LMULMAX1-NEXT: vand.vi v8, v12, 1 +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; LMULMAX1-NEXT: vand.vi v8, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v11, v12, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v11, 0 -; LMULMAX1-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v11, v9 -; LMULMAX1-NEXT: vand.vi v9, v11, 1 +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v9 +; LMULMAX1-NEXT: vand.vi v9, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v9, 0 +; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 +; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptoui <8 x float> %x to <8 x i1> ret <8 x i1> %z @@ -448,26 +432,22 @@ ; LMULMAX1-NEXT: vnsrl.wi v10, v12, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v11 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v11 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v11, v11, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v9 +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v11, v9 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v9, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v9, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -475,8 +455,8 @@ ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse8.v v12, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 6 +; LMULMAX1-NEXT: vse8.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x double>, <8 x double>* %x %d = fptosi <8 x double> %a to <8 x i8> @@ -514,26 +494,22 @@ ; LMULMAX1-NEXT: vnsrl.wi v10, v12, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v11 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v11 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v11, v12, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v10, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v11, v11, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v9 +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v11, v9 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v9, v10, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v11, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v9, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu @@ -541,8 +517,8 @@ ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse8.v v12, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 6 +; LMULMAX1-NEXT: vse8.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x double>, <8 x double>* %x %d = fptoui <8 x double> %a to <8 x i8> @@ -561,54 +537,46 @@ ; ; LMULMAX1-LABEL: fp2si_v8f64_v8i1: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmclr.m v0 -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v14, v8 -; LMULMAX1-NEXT: vand.vi v8, v14, 1 +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v12, v8 +; LMULMAX1-NEXT: vand.vi v8, v12, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v14, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v14, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v13, 0 -; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v14, v9 -; LMULMAX1-NEXT: vand.vi v9, v14, 1 +; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v13, v9 +; LMULMAX1-NEXT: vand.vi v9, v13, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v9, v8, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v9, 0 +; LMULMAX1-NEXT: vmerge.vim v13, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v9, 2 +; LMULMAX1-NEXT: vslideup.vi v12, v13, 2 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v13, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0 +; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 +; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v13, v10 ; LMULMAX1-NEXT: vand.vi v10, v13, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v10, v8, 1, v0 +; LMULMAX1-NEXT: vmerge.vim v10, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v9, v10, 4 +; LMULMAX1-NEXT: vslideup.vi v12, v10, 4 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0 +; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.x.f.w v10, v11 ; LMULMAX1-NEXT: vand.vi v10, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 +; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptosi <8 x double> %x to <8 x i1> ret <8 x i1> %z @@ -625,54 +593,46 @@ ; ; LMULMAX1-LABEL: fp2ui_v8f64_v8i1: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmclr.m v0 -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v14, v8 -; LMULMAX1-NEXT: vand.vi v8, v14, 1 +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; LMULMAX1-NEXT: vand.vi v8, v12, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vmerge.vim v14, v8, 1, v0 -; LMULMAX1-NEXT: vsetivli zero, 2, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v14, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v13, 0 -; LMULMAX1-NEXT: vmerge.vim v13, v12, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v8, 0 +; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v14, v9 -; LMULMAX1-NEXT: vand.vi v9, v14, 1 +; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v13, v9 +; LMULMAX1-NEXT: vand.vi v9, v13, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v9, v8, 1, v0 +; LMULMAX1-NEXT: vmv.v.i v9, 0 +; LMULMAX1-NEXT: vmerge.vim v13, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v13, v9, 2 +; LMULMAX1-NEXT: vslideup.vi v12, v13, 2 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v13, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0 +; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 +; LMULMAX1-NEXT: vmerge.vim v12, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v13, v10 ; LMULMAX1-NEXT: vand.vi v10, v13, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v10, v8, 1, v0 +; LMULMAX1-NEXT: vmerge.vim v10, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 6, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v9, v10, 4 +; LMULMAX1-NEXT: vslideup.vi v12, v10, 4 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v12, 1, v0 +; LMULMAX1-NEXT: vmsne.vi v0, v12, 0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.rtz.xu.f.w v10, v11 ; LMULMAX1-NEXT: vand.vi v10, v10, 1 ; LMULMAX1-NEXT: vmsne.vi v0, v10, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf8, ta, mu -; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 +; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v9, v8, 6 +; LMULMAX1-NEXT: vslideup.vi v8, v9, 6 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmsne.vi v0, v9, 0 +; LMULMAX1-NEXT: vmsne.vi v0, v8, 0 ; LMULMAX1-NEXT: ret %z = fptoui <8 x double> %x to <8 x i1> ret <8 x i1> %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -478,29 +478,25 @@ ; LMULMAX1-NEXT: vfncvt.f.x.w v12, v10 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v12 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.f.x.w v10, v11 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vfncvt.f.x.w v12, v11 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v11, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v11, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.f.x.w v10, v9 +; LMULMAX1-NEXT: vfncvt.f.x.w v11, v9 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v9, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v9, v11 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v9, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.f.x.w v9, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse16.v v12, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 6 +; LMULMAX1-NEXT: vse16.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x i64>, <8 x i64>* %x %d = sitofp <8 x i64> %a to <8 x half> @@ -534,29 +530,25 @@ ; LMULMAX1-NEXT: vfncvt.f.xu.w v12, v10 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v10, v12 -; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; LMULMAX1-NEXT: vmv.v.i v12, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.f.xu.w v10, v11 +; LMULMAX1-NEXT: vsetvli zero, zero, e32, mf2, ta, mu +; LMULMAX1-NEXT: vfncvt.f.xu.w v12, v11 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v11, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v11, v12 ; LMULMAX1-NEXT: vsetivli zero, 4, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v11, 2 +; LMULMAX1-NEXT: vslideup.vi v10, v11, 2 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; LMULMAX1-NEXT: vfncvt.f.xu.w v10, v9 +; LMULMAX1-NEXT: vfncvt.f.xu.w v11, v9 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; LMULMAX1-NEXT: vfncvt.f.f.w v9, v10 +; LMULMAX1-NEXT: vfncvt.f.f.w v9, v11 ; LMULMAX1-NEXT: vsetivli zero, 6, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v9, 4 +; LMULMAX1-NEXT: vslideup.vi v10, v9, 4 ; LMULMAX1-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; LMULMAX1-NEXT: vfncvt.f.xu.w v9, v8 ; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; LMULMAX1-NEXT: vfncvt.f.f.w v8, v9 ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, tu, mu -; LMULMAX1-NEXT: vslideup.vi v12, v8, 6 -; LMULMAX1-NEXT: vse16.v v12, (a1) +; LMULMAX1-NEXT: vslideup.vi v10, v8, 6 +; LMULMAX1-NEXT: vse16.v v10, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x i64>, <8 x i64>* %x %d = uitofp <8 x i64> %a to <8 x half> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -152,11 +152,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu -; CHECK-NEXT: vslideup.vi v9, v8, 0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %sv = load <2 x i32>, <2 x i32>* %svp %v = call <4 x i32> @llvm.experimental.vector.insert.v2i32.v4i32(<4 x i32> undef, <2 x i32> %sv, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -43,28 +43,24 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vsetivli zero, 2, e64, m2, tu, mu -; RV32-NEXT: vslideup.vi v10, v8, 0 ; RV32-NEXT: lw a3, 16(a0) ; RV32-NEXT: addi a4, a0, 20 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vlse32.v v8, (a4), zero +; RV32-NEXT: vlse32.v v10, (a4), zero ; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; RV32-NEXT: vmv.s.x v8, a3 +; RV32-NEXT: vmv.s.x v10, a3 ; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, mu -; RV32-NEXT: vslideup.vi v10, v8, 2 +; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, mu -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vslide1up.vx v12, v8, a2 -; RV32-NEXT: vslide1up.vx v8, v12, a1 +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vslide1up.vx v12, v10, a2 +; RV32-NEXT: vslide1up.vx v10, v12, a1 ; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, mu -; RV32-NEXT: vslideup.vi v10, v8, 2 +; RV32-NEXT: vslideup.vi v8, v10, 2 ; RV32-NEXT: sw a1, 16(a0) ; RV32-NEXT: sw a2, 20(a0) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vse64.v v10, (a0) +; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: insertelt_v3i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll @@ -213,17 +213,13 @@ ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; LMULMAX1-NEXT: vmv.v.i v10, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 0 -; LMULMAX1-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v9, 0 +; LMULMAX1-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, mu -; LMULMAX1-NEXT: vnsrl.wi v8, v8, 0 +; LMULMAX1-NEXT: vnsrl.wi v9, v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, tu, mu -; LMULMAX1-NEXT: vslideup.vi v10, v8, 4 -; LMULMAX1-NEXT: vse8.v v10, (a1) +; LMULMAX1-NEXT: vslideup.vi v8, v9, 4 +; LMULMAX1-NEXT: vse8.v v8, (a1) ; LMULMAX1-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = trunc <8 x i32> %a to <8 x i8> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -2173,28 +2173,24 @@ ; ; RV64-LABEL: mgather_baseidx_v32i8: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64-NEXT: vmv1r.v v12, v10 +; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu -; RV64-NEXT: vslidedown.vi v14, v10, 16 -; RV64-NEXT: vslidedown.vi v16, v8, 16 +; RV64-NEXT: vslidedown.vi v10, v10, 16 +; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vsext.vf8 v24, v16 +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v14, (a0), v24, v0.t -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vsetivli zero, 16, e8, m2, tu, mu -; RV64-NEXT: vslideup.vi v8, v10, 0 ; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, mu -; RV64-NEXT: vslideup.vi v8, v14, 16 +; RV64-NEXT: vslideup.vi v12, v10, 16 +; RV64-NEXT: vmv2r.v v8, v12 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs %v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)