diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -577,7 +577,8 @@ // Mask VTs are custom-expanded into a series of standard nodes setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS, - ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, + ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, + ISD::SCALAR_TO_VECTOR}, VT, Custom); setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, @@ -699,9 +700,9 @@ ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); - setOperationAction( - {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, - VT, Custom); + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, + VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); @@ -794,9 +795,9 @@ setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); - setOperationAction( - {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, - VT, Custom); + setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, + ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, + VT, Custom); setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); @@ -873,6 +874,8 @@ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); @@ -4097,6 +4100,26 @@ return lowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::SCALAR_TO_VECTOR: { + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + SDValue Scalar = Op.getOperand(0); + if (VT.getVectorElementType() == MVT::i1) { + MVT WideVT = VT.changeVectorElementType(MVT::i8); + SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar); + return DAG.getNode(ISD::TRUNCATE, DL, VT, V); + } + MVT ContainerVT = getContainerForFixedLengthVector(VT); + SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; + // Integer types are implicitly truncated + if (Scalar.getValueType().isInteger()) + Scalar = DAG.getZExtOrTrunc(Scalar, DL, Subtarget.getXLenVT()); + SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), Scalar, VL); + if (VT.isFixedLengthVector()) + V = convertFromScalableVector(VT, V, DAG, Subtarget); + return V; + } case ISD::VSCALE: { MVT VT = Op.getSimpleValueType(); SDLoc DL(Op); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll @@ -7,8 +7,6 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-LABEL: load_large_vector: ; ZVE32X: # %bb.0: -; ZVE32X-NEXT: addi sp, sp, -16 -; ZVE32X-NEXT: .cfi_def_cfa_offset 16 ; ZVE32X-NEXT: ld a1, 80(a0) ; ZVE32X-NEXT: ld a2, 72(a0) ; ZVE32X-NEXT: ld a3, 56(a0) @@ -19,49 +17,47 @@ ; ZVE32X-NEXT: ld a0, 0(a0) ; ZVE32X-NEXT: xor a4, a5, a4 ; ZVE32X-NEXT: snez a4, a4 -; ZVE32X-NEXT: sb a4, 12(sp) +; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; ZVE32X-NEXT: vmv.s.x v8, a4 +; ZVE32X-NEXT: vand.vi v8, v8, 1 +; ZVE32X-NEXT: vmsne.vi v0, v8, 0 +; ZVE32X-NEXT: vmv.v.i v8, 0 +; ZVE32X-NEXT: vmerge.vim v9, v8, 1, v0 ; ZVE32X-NEXT: xor a0, a0, a7 ; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: sb a0, 15(sp) -; ZVE32X-NEXT: xor a0, a6, a3 -; ZVE32X-NEXT: snez a0, a0 -; ZVE32X-NEXT: sb a0, 13(sp) -; ZVE32X-NEXT: xor a1, a2, a1 -; ZVE32X-NEXT: snez a0, a1 -; ZVE32X-NEXT: sb a0, 14(sp) -; ZVE32X-NEXT: addi a0, sp, 12 -; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vlm.v v0, (a0) -; ZVE32X-NEXT: addi a0, sp, 15 -; ZVE32X-NEXT: vlm.v v8, (a0) -; ZVE32X-NEXT: vmv.v.i v9, 0 -; ZVE32X-NEXT: vmerge.vim v10, v9, 1, v0 +; ZVE32X-NEXT: vmv.s.x v10, a0 +; ZVE32X-NEXT: vand.vi v10, v10, 1 +; ZVE32X-NEXT: vmsne.vi v0, v10, 0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmv.v.i v11, 0 -; ZVE32X-NEXT: vmv1r.v v0, v8 -; ZVE32X-NEXT: vmerge.vim v8, v11, 1, v0 +; ZVE32X-NEXT: vmv.v.i v10, 0 +; ZVE32X-NEXT: vmerge.vim v11, v10, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v8, v10, 1 +; ZVE32X-NEXT: vslideup.vi v11, v9, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v8, 0 -; ZVE32X-NEXT: vmerge.vim v8, v11, 1, v0 -; ZVE32X-NEXT: addi a0, sp, 13 +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 +; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0 +; ZVE32X-NEXT: xor a0, a6, a3 +; ZVE32X-NEXT: snez a0, a0 +; ZVE32X-NEXT: vmv.s.x v11, a0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vlm.v v0, (a0) -; ZVE32X-NEXT: vmerge.vim v10, v9, 1, v0 +; ZVE32X-NEXT: vand.vi v11, v11, 1 +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 +; ZVE32X-NEXT: vmerge.vim v11, v8, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 3, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v8, v10, 2 +; ZVE32X-NEXT: vslideup.vi v9, v11, 2 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v8, 0 -; ZVE32X-NEXT: vmerge.vim v8, v11, 1, v0 -; ZVE32X-NEXT: addi a0, sp, 14 +; ZVE32X-NEXT: vmsne.vi v0, v9, 0 +; ZVE32X-NEXT: vmerge.vim v9, v10, 1, v0 +; ZVE32X-NEXT: xor a1, a2, a1 +; ZVE32X-NEXT: snez a0, a1 +; ZVE32X-NEXT: vmv.s.x v10, a0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vlm.v v0, (a0) -; ZVE32X-NEXT: vmerge.vim v9, v9, 1, v0 +; ZVE32X-NEXT: vand.vi v10, v10, 1 +; ZVE32X-NEXT: vmsne.vi v0, v10, 0 +; ZVE32X-NEXT: vmerge.vim v8, v8, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vslideup.vi v8, v9, 3 -; ZVE32X-NEXT: vmsne.vi v0, v8, 0 -; ZVE32X-NEXT: addi sp, sp, 16 +; ZVE32X-NEXT: vslideup.vi v9, v8, 3 +; ZVE32X-NEXT: vmsne.vi v0, v9, 0 ; ZVE32X-NEXT: ret ; ; ZVE64X-LABEL: load_large_vector: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll @@ -11,14 +11,8 @@ ; ; RV64-LABEL: load_v5i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: ld a0, 0(a0) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vlse64.v v8, (a0), zero ; RV64-NEXT: ret %x = load <5 x i8>, ptr %p ret <5 x i8> %x @@ -33,14 +27,8 @@ ; ; RV64-LABEL: load_v6i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: ld a0, 0(a0) -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vlse64.v v8, (a0), zero ; RV64-NEXT: ret %x = load <6 x i8>, ptr %p ret <6 x i8> %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -220,35 +220,20 @@ ; ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 ; RV32ZVE32F-NEXT: lw a1, 8(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: sb a1, 15(sp) -; RV32ZVE32F-NEXT: sb a0, 14(sp) -; RV32ZVE32F-NEXT: addi a0, sp, 15 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV32ZVE32F-NEXT: vle8.v v9, (a0) -; RV32ZVE32F-NEXT: addi a0, sp, 14 -; RV32ZVE32F-NEXT: vle8.v v10, (a0) ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32ZVE32F-NEXT: vmv.s.x v9, a1 +; RV32ZVE32F-NEXT: vmv.s.x v10, a0 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t -; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi sp, sp, -16 -; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV64ZVE32F-NEXT: sb a1, 15(sp) -; RV64ZVE32F-NEXT: sb a0, 14(sp) -; RV64ZVE32F-NEXT: addi a0, sp, 15 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vle8.v v9, (a0) -; RV64ZVE32F-NEXT: addi a0, sp, 14 -; RV64ZVE32F-NEXT: vle8.v v8, (a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a1 +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 ; RV64ZVE32F-NEXT: andi a1, a0, 1 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 @@ -257,7 +242,6 @@ ; RV64ZVE32F-NEXT: andi a0, a0, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB4_4 ; RV64ZVE32F-NEXT: .LBB4_2: # %else2 -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -268,7 +252,6 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vse8.v v8, (a3) -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret %tval = trunc <2 x i64> %val to <2 x i8> call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m) @@ -782,35 +765,20 @@ ; ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -16 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 ; RV32ZVE32F-NEXT: lw a1, 8(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: sh a1, 14(sp) -; RV32ZVE32F-NEXT: sh a0, 12(sp) -; RV32ZVE32F-NEXT: addi a0, sp, 14 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV32ZVE32F-NEXT: vle16.v v9, (a0) -; RV32ZVE32F-NEXT: addi a0, sp, 12 -; RV32ZVE32F-NEXT: vle16.v v10, (a0) ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV32ZVE32F-NEXT: vmv.s.x v9, a1 +; RV32ZVE32F-NEXT: vmv.s.x v10, a0 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t -; RV32ZVE32F-NEXT: addi sp, sp, 16 ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: addi sp, sp, -16 -; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; RV64ZVE32F-NEXT: sh a1, 14(sp) -; RV64ZVE32F-NEXT: sh a0, 12(sp) -; RV64ZVE32F-NEXT: addi a0, sp, 14 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vle16.v v9, (a0) -; RV64ZVE32F-NEXT: addi a0, sp, 12 -; RV64ZVE32F-NEXT: vle16.v v8, (a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a1 +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 @@ -820,7 +788,6 @@ ; RV64ZVE32F-NEXT: andi a0, a0, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB13_4 ; RV64ZVE32F-NEXT: .LBB13_2: # %else2 -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -831,7 +798,6 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vse16.v v8, (a3) -; RV64ZVE32F-NEXT: addi sp, sp, 16 ; RV64ZVE32F-NEXT: ret %tval = trunc <2 x i64> %val to <2 x i16> call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)