diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3717,10 +3717,16 @@ auto *Store = cast(Op); SDLoc DL(Op); - MVT VT = Store->getValue().getSimpleValueType(); + SDValue StoreVal = Store->getValue(); + MVT VT = StoreVal.getSimpleValueType(); - // FIXME: We probably need to zero any extra bits in a byte for mask stores. - // This is tricky to do. + // If the size less than a byte, we need to the unused bits with 0s. + if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) { + VT = MVT::v8i1; + StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + DAG.getConstant(0, DL, VT), StoreVal, + DAG.getIntPtrConstant(0, DL)); + } MVT ContainerVT = getContainerForFixedLengthVector(VT); @@ -3728,7 +3734,7 @@ DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT()); SDValue NewValue = - convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget); + convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget); return DAG.getMemIntrinsicNode( RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other), {Store->getChain(), NewValue, Store->getBasePtr(), VL}, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -567,8 +567,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu ; CHECK-NEXT: vand.vi v25, v8, 1 -; CHECK-NEXT: vmsne.vi v26, v25, 0 -; CHECK-NEXT: vse1.v v26, (a0) +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a0) ; CHECK-NEXT: ret %y = trunc <2 x i8> %x to <2 x i1> store <2 x i1> %y, <2 x i1>* %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -353,16 +353,32 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: addi a2, zero, 32 ; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu -; LMULMAX2-NEXT: vle1.v v25, (a0) +; LMULMAX2-NEXT: vle1.v v0, (a0) ; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX2-NEXT: vmv.v.i v25, 0 +; LMULMAX2-NEXT: vmerge.vim v25, v25, 1, v0 +; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; LMULMAX2-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX2-NEXT: vmsne.vi v25, v26, 0 ; LMULMAX2-NEXT: vse1.v v25, (a1) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: extract_v2i1_v64i1_0: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu -; LMULMAX1-NEXT: vle1.v v25, (a0) +; LMULMAX1-NEXT: vle1.v v0, (a0) ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vmsne.vi v25, v26, 0 ; LMULMAX1-NEXT: vse1.v v25, (a1) ; LMULMAX1-NEXT: ret %a = load <64 x i1>, <64 x i1>* %x @@ -382,6 +398,14 @@ ; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu ; LMULMAX2-NEXT: vslidedown.vi v26, v26, 2 ; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX2-NEXT: vmsne.vi v0, v26, 0 +; LMULMAX2-NEXT: vmv.v.i v25, 0 +; LMULMAX2-NEXT: vmerge.vim v25, v25, 1, v0 +; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; LMULMAX2-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu ; LMULMAX2-NEXT: vmsne.vi v25, v26, 0 ; LMULMAX2-NEXT: vse1.v v25, (a1) ; LMULMAX2-NEXT: ret @@ -394,8 +418,16 @@ ; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0 ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2 -; LMULMAX1-NEXT: vmsne.vi v26, v25, 0 -; LMULMAX1-NEXT: vse1.v v26, (a1) +; LMULMAX1-NEXT: vmsne.vi v0, v25, 0 +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vmsne.vi v25, v26, 0 +; LMULMAX1-NEXT: vse1.v v25, (a1) ; LMULMAX1-NEXT: ret %a = load <64 x i1>, <64 x i1>* %x %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2) @@ -415,6 +447,14 @@ ; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu ; LMULMAX2-NEXT: vslidedown.vi v26, v26, 10 ; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; LMULMAX2-NEXT: vmsne.vi v0, v26, 0 +; LMULMAX2-NEXT: vmv.v.i v25, 0 +; LMULMAX2-NEXT: vmerge.vim v25, v25, 1, v0 +; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX2-NEXT: vmv.v.i v26, 0 +; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; LMULMAX2-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu ; LMULMAX2-NEXT: vmsne.vi v25, v26, 0 ; LMULMAX2-NEXT: vse1.v v25, (a1) ; LMULMAX2-NEXT: ret @@ -428,8 +468,16 @@ ; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0 ; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu ; LMULMAX1-NEXT: vslidedown.vi v25, v25, 10 -; LMULMAX1-NEXT: vmsne.vi v26, v25, 0 -; LMULMAX1-NEXT: vse1.v v26, (a1) +; LMULMAX1-NEXT: vmsne.vi v0, v25, 0 +; LMULMAX1-NEXT: vmv.v.i v25, 0 +; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vmv.v.i v26, 0 +; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; LMULMAX1-NEXT: vslideup.vi v26, v25, 0 +; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; LMULMAX1-NEXT: vmsne.vi v25, v26, 0 +; LMULMAX1-NEXT: vse1.v v25, (a1) ; LMULMAX1-NEXT: ret %a = load <64 x i1>, <64 x i1>* %x %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42) @@ -441,7 +489,15 @@ ; CHECK-LABEL: extract_v2i1_nxv2i1_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v0, (a0) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a0) ; CHECK-NEXT: ret %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1( %x, i64 0) store <2 x i1> %c, <2 x i1>* %y @@ -457,8 +513,16 @@ ; CHECK-NEXT: vsetivli a1, 2, e8,mf4,ta,mu ; CHECK-NEXT: vslidedown.vi v25, v25, 2 ; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu -; CHECK-NEXT: vmsne.vi v26, v25, 0 -; CHECK-NEXT: vse1.v v26, (a0) +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a0) ; CHECK-NEXT: ret %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1( %x, i64 2) store <2 x i1> %c, <2 x i1>* %y @@ -469,7 +533,15 @@ ; CHECK-LABEL: extract_v2i1_nxv64i1_0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v0, (a0) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a0) ; CHECK-NEXT: ret %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1( %x, i64 0) store <2 x i1> %c, <2 x i1>* %y @@ -485,7 +557,15 @@ ; CHECK-NEXT: vsetivli a1, 2, e8,m8,ta,mu ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu -; CHECK-NEXT: vmsne.vi v25, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a0) ; CHECK-NEXT: ret %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1( %x, i64 2) @@ -503,7 +583,15 @@ ; CHECK-NEXT: vsetivli a2, 2, e8,m8,ta,mu ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu -; CHECK-NEXT: vmsne.vi v25, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a0) ; CHECK-NEXT: ret %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1( %x, i64 42) @@ -520,7 +608,15 @@ ; CHECK-NEXT: vsetivli a1, 2, e8,m4,ta,mu ; CHECK-NEXT: vslidedown.vi v28, v28, 26 ; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu -; CHECK-NEXT: vmsne.vi v25, v28, 0 +; CHECK-NEXT: vmsne.vi v0, v28, 0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a0) ; CHECK-NEXT: ret %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv32i1( %x, i64 26) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll @@ -42,9 +42,17 @@ ; CHECK-NEXT: vsetivli a3, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v25, (a0) ; CHECK-NEXT: vle32.v v26, (a1) -; CHECK-NEXT: vmfne.vv v27, v25, v26 +; CHECK-NEXT: vmfne.vv v0, v25, v26 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a2) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = load <4 x float>, <4 x float>* %y @@ -59,9 +67,17 @@ ; CHECK-NEXT: vsetivli a3, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v25, (a0) ; CHECK-NEXT: vle32.v v26, (a1) -; CHECK-NEXT: vmfne.vv v27, v25, v26 +; CHECK-NEXT: vmfne.vv v0, v25, v26 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a2) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = load <4 x float>, <4 x float>* %y @@ -76,9 +92,17 @@ ; CHECK-NEXT: vsetivli a3, 2, e64,m1,ta,mu ; CHECK-NEXT: vle64.v v25, (a0) ; CHECK-NEXT: vle64.v v26, (a1) -; CHECK-NEXT: vmflt.vv v27, v26, v25 +; CHECK-NEXT: vmflt.vv v0, v26, v25 ; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a2) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = load <2 x double>, <2 x double>* %y @@ -93,9 +117,17 @@ ; CHECK-NEXT: vsetivli a3, 2, e64,m1,ta,mu ; CHECK-NEXT: vle64.v v25, (a0) ; CHECK-NEXT: vle64.v v26, (a1) -; CHECK-NEXT: vmflt.vv v27, v26, v25 +; CHECK-NEXT: vmflt.vv v0, v26, v25 ; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v27, (a2) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a2) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = load <2 x double>, <2 x double>* %y @@ -178,8 +210,16 @@ ; CHECK-NEXT: vsetivli a3, 4, e64,m2,ta,mu ; CHECK-NEXT: vle64.v v26, (a0) ; CHECK-NEXT: vle64.v v28, (a1) -; CHECK-NEXT: vmfle.vv v25, v26, v28 +; CHECK-NEXT: vmfle.vv v0, v26, v28 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a2) ; CHECK-NEXT: ret %a = load <4 x double>, <4 x double>* %x @@ -195,8 +235,16 @@ ; CHECK-NEXT: vsetivli a3, 4, e64,m2,ta,mu ; CHECK-NEXT: vle64.v v26, (a0) ; CHECK-NEXT: vle64.v v28, (a1) -; CHECK-NEXT: vmfle.vv v25, v26, v28 +; CHECK-NEXT: vmfle.vv v0, v26, v28 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a2) ; CHECK-NEXT: ret %a = load <4 x double>, <4 x double>* %x @@ -433,7 +481,15 @@ ; CHECK-NEXT: vmfeq.vv v27, v25, v25 ; CHECK-NEXT: vmfeq.vv v25, v26, v26 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v25, v25, v27 +; CHECK-NEXT: vmand.mm v0, v25, v27 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a2) ; CHECK-NEXT: ret %a = load <4 x half>, <4 x half>* %x @@ -452,7 +508,15 @@ ; CHECK-NEXT: vmfne.vv v27, v25, v25 ; CHECK-NEXT: vmfne.vv v25, v26, v26 ; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v25, v25, v27 +; CHECK-NEXT: vmor.mm v0, v25, v27 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a2) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x @@ -501,9 +565,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v25, (a0) -; CHECK-NEXT: vmfne.vf v26, v25, fa0 +; CHECK-NEXT: vmfne.vf v0, v25, fa0 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = insertelement <4 x float> undef, float %y, i32 0 @@ -518,9 +590,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v25, (a0) -; CHECK-NEXT: vmfne.vf v26, v25, fa0 +; CHECK-NEXT: vmfne.vf v0, v25, fa0 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = insertelement <4 x float> undef, float %y, i32 0 @@ -535,9 +615,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; CHECK-NEXT: vle64.v v25, (a0) -; CHECK-NEXT: vmfgt.vf v26, v25, fa0 +; CHECK-NEXT: vmfgt.vf v0, v25, fa0 ; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = insertelement <2 x double> undef, double %y, i32 0 @@ -552,9 +640,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; CHECK-NEXT: vle64.v v25, (a0) -; CHECK-NEXT: vmfgt.vf v26, v25, fa0 +; CHECK-NEXT: vmfgt.vf v0, v25, fa0 ; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = insertelement <2 x double> undef, double %y, i32 0 @@ -637,8 +733,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 4, e64,m2,ta,mu ; CHECK-NEXT: vle64.v v26, (a0) -; CHECK-NEXT: vmfle.vf v25, v26, fa0 +; CHECK-NEXT: vmfle.vf v0, v26, fa0 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x double>, <4 x double>* %x @@ -654,8 +758,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 4, e64,m2,ta,mu ; CHECK-NEXT: vle64.v v26, (a0) -; CHECK-NEXT: vmfle.vf v25, v26, fa0 +; CHECK-NEXT: vmfle.vf v0, v26, fa0 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x double>, <4 x double>* %x @@ -893,7 +1005,15 @@ ; CHECK-NEXT: vmfeq.vf v27, v26, fa0 ; CHECK-NEXT: vmfeq.vv v26, v25, v25 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v25, v26, v27 +; CHECK-NEXT: vmand.mm v0, v26, v27 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x half>, <4 x half>* %x @@ -913,7 +1033,15 @@ ; CHECK-NEXT: vmfne.vf v27, v26, fa0 ; CHECK-NEXT: vmfne.vv v26, v25, v25 ; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v25, v26, v27 +; CHECK-NEXT: vmor.mm v0, v26, v27 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x @@ -963,9 +1091,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v25, (a0) -; CHECK-NEXT: vmfne.vf v26, v25, fa0 +; CHECK-NEXT: vmfne.vf v0, v25, fa0 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = insertelement <4 x float> undef, float %y, i32 0 @@ -980,9 +1116,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu ; CHECK-NEXT: vle32.v v25, (a0) -; CHECK-NEXT: vmfne.vf v26, v25, fa0 +; CHECK-NEXT: vmfne.vf v0, v25, fa0 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x float>, <4 x float>* %x %b = insertelement <4 x float> undef, float %y, i32 0 @@ -997,9 +1141,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; CHECK-NEXT: vle64.v v25, (a0) -; CHECK-NEXT: vmflt.vf v26, v25, fa0 +; CHECK-NEXT: vmflt.vf v0, v25, fa0 ; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = insertelement <2 x double> undef, double %y, i32 0 @@ -1014,9 +1166,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu ; CHECK-NEXT: vle64.v v25, (a0) -; CHECK-NEXT: vmflt.vf v26, v25, fa0 +; CHECK-NEXT: vmflt.vf v0, v25, fa0 ; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; CHECK-NEXT: vse1.v v26, (a1) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x double>, <2 x double>* %x %b = insertelement <2 x double> undef, double %y, i32 0 @@ -1099,8 +1259,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 4, e64,m2,ta,mu ; CHECK-NEXT: vle64.v v26, (a0) -; CHECK-NEXT: vmfge.vf v25, v26, fa0 +; CHECK-NEXT: vmfge.vf v0, v26, fa0 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x double>, <4 x double>* %x @@ -1116,8 +1284,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 4, e64,m2,ta,mu ; CHECK-NEXT: vle64.v v26, (a0) -; CHECK-NEXT: vmfge.vf v25, v26, fa0 +; CHECK-NEXT: vmfge.vf v0, v26, fa0 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x double>, <4 x double>* %x @@ -1355,7 +1531,15 @@ ; CHECK-NEXT: vmfeq.vv v27, v25, v25 ; CHECK-NEXT: vmfeq.vf v25, v26, fa0 ; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu -; CHECK-NEXT: vmand.mm v25, v25, v27 +; CHECK-NEXT: vmand.mm v0, v25, v27 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x half>, <4 x half>* %x @@ -1375,7 +1559,15 @@ ; CHECK-NEXT: vmfne.vv v27, v25, v25 ; CHECK-NEXT: vmfne.vf v25, v26, fa0 ; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu -; CHECK-NEXT: vmor.mm v25, v25, v27 +; CHECK-NEXT: vmor.mm v0, v25, v27 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll @@ -8,7 +8,15 @@ ; CHECK-LABEL: load_store_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 1, e8,m1,ta,mu -; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v0, (a0) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 1, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <1 x i1>, <1 x i1>* %x @@ -20,7 +28,15 @@ ; CHECK-LABEL: load_store_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 2, e8,m1,ta,mu -; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v0, (a0) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <2 x i1>, <2 x i1>* %x @@ -32,7 +48,15 @@ ; CHECK-LABEL: load_store_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a2, 4, e8,m1,ta,mu -; CHECK-NEXT: vle1.v v25, (a0) +; CHECK-NEXT: vle1.v v0, (a0) +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a1) ; CHECK-NEXT: ret %a = load <4 x i1>, <4 x i1>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -8,7 +8,15 @@ ; CHECK-LABEL: splat_ones_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu -; CHECK-NEXT: vmset.m v25 +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 1, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a0) ; CHECK-NEXT: ret store <1 x i1> , <1 x i1>* %x @@ -19,7 +27,15 @@ ; CHECK-LABEL: splat_zeros_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu -; CHECK-NEXT: vmclr.m v25 +; CHECK-NEXT: vmclr.m v0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a0) ; CHECK-NEXT: ret store <2 x i1> zeroinitializer, <2 x i1>* %x @@ -30,7 +46,15 @@ ; CHECK-LABEL: splat_ones_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu -; CHECK-NEXT: vmset.m v25 +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 ; CHECK-NEXT: vse1.v v25, (a0) ; CHECK-NEXT: ret store <4 x i1> , <4 x i1>* %x