Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12170,7 +12170,34 @@ break; auto *Store = cast(N); + EVT MemVT = Store->getMemoryVT(); SDValue Val = Store->getValue(); + + // FLAGIT + // Using vector to store zeros requires e.g.: + // vsetivli zero, 2, e64, m1, ta, ma + // vmv.v.i v8, 0 + // vse64.v v8, (a0) + // If sufficiently aligned, we can use at most two scalar stores to zero + // initialize any power-of-two size up to XLen * 2 bits. + if (DCI.isBeforeLegalize() && !Store->isTruncatingStore() && + !Store->isIndexed() && ISD::isBuildVectorAllZeros(Val.getNode()) && + MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) && + isPowerOf2_64(MemVT.getSizeInBits()) && + MemVT.getSizeInBits() <= Subtarget.getXLen() * 2) { + assert(!MemVT.isScalableVector()); + auto NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); + if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + NewVT, *Store->getMemOperand())) { + SDLoc DL(N); + SDValue Chain = Store->getChain(); + auto NewV = DAG.getConstant(0, DL, NewVT); + return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(), + Store->getPointerInfo(), Store->getOriginalAlign(), + Store->getMemOperand()->getFlags()); + } + } + // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. // vfmv.f.s is represented as extract element from 0. Match it late to avoid // any illegal types. @@ -12180,7 +12207,6 @@ isNullConstant(Val.getOperand(1)))) { SDValue Src = Val.getOperand(0); MVT VecVT = Src.getSimpleValueType(); - EVT MemVT = Store->getMemoryVT(); // VecVT should be scalable and memory VT should match the element type. if (VecVT.isScalableVector() && MemVT == VecVT.getVectorElementType()) { Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -1350,24 +1350,26 @@ ; LMULMAX1-NEXT: .cfi_def_cfa_offset 144 ; LMULMAX1-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; LMULMAX1-NEXT: .cfi_offset ra, -8 +; LMULMAX1-NEXT: sd zero, 120(sp) +; LMULMAX1-NEXT: sd zero, 112(sp) +; LMULMAX1-NEXT: sd zero, 104(sp) +; LMULMAX1-NEXT: sd zero, 96(sp) +; LMULMAX1-NEXT: sd zero, 88(sp) +; LMULMAX1-NEXT: sd zero, 80(sp) +; LMULMAX1-NEXT: sd zero, 72(sp) +; LMULMAX1-NEXT: sd zero, 64(sp) +; LMULMAX1-NEXT: sd zero, 56(sp) +; LMULMAX1-NEXT: sd zero, 48(sp) +; LMULMAX1-NEXT: sd zero, 40(sp) +; LMULMAX1-NEXT: sd zero, 32(sp) +; LMULMAX1-NEXT: sd zero, 24(sp) +; LMULMAX1-NEXT: sd zero, 16(sp) +; LMULMAX1-NEXT: sd zero, 8(sp) +; LMULMAX1-NEXT: sd zero, 0(sp) ; LMULMAX1-NEXT: li a0, 8 ; LMULMAX1-NEXT: sd a0, 128(sp) ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse32.v v8, (sp) -; LMULMAX1-NEXT: addi a0, sp, 112 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 96 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 80 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 64 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 48 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 32 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 16 ; LMULMAX1-NEXT: li a1, 1 ; LMULMAX1-NEXT: li a2, 2 ; LMULMAX1-NEXT: li a3, 3 @@ -1375,7 +1377,6 @@ ; LMULMAX1-NEXT: li a5, 5 ; LMULMAX1-NEXT: li a6, 6 ; LMULMAX1-NEXT: li a7, 7 -; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: li a0, 0 ; LMULMAX1-NEXT: vmv.v.i v9, 0 ; LMULMAX1-NEXT: vmv.v.i v10, 0 @@ -1546,59 +1547,60 @@ ; LMULMAX1-NEXT: .cfi_def_cfa_offset 160 ; LMULMAX1-NEXT: sd ra, 152(sp) # 8-byte Folded Spill ; LMULMAX1-NEXT: .cfi_offset ra, -8 +; LMULMAX1-NEXT: sd zero, 120(sp) +; LMULMAX1-NEXT: sd zero, 112(sp) +; LMULMAX1-NEXT: sd zero, 104(sp) +; LMULMAX1-NEXT: sd zero, 96(sp) +; LMULMAX1-NEXT: sd zero, 88(sp) +; LMULMAX1-NEXT: sd zero, 80(sp) +; LMULMAX1-NEXT: sd zero, 72(sp) +; LMULMAX1-NEXT: sd zero, 64(sp) +; LMULMAX1-NEXT: sd zero, 56(sp) +; LMULMAX1-NEXT: sd zero, 48(sp) +; LMULMAX1-NEXT: sd zero, 40(sp) +; LMULMAX1-NEXT: sd zero, 32(sp) +; LMULMAX1-NEXT: sd zero, 24(sp) +; LMULMAX1-NEXT: sd zero, 16(sp) +; LMULMAX1-NEXT: sd zero, 8(sp) +; LMULMAX1-NEXT: sd zero, 0(sp) ; LMULMAX1-NEXT: li a0, 8 ; LMULMAX1-NEXT: sd a0, 128(sp) -; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; LMULMAX1-NEXT: vmv.v.i v8, 0 -; LMULMAX1-NEXT: vse32.v v8, (sp) -; LMULMAX1-NEXT: addi a0, sp, 112 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 96 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 80 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 64 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 48 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 32 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: addi a0, sp, 16 -; LMULMAX1-NEXT: vse32.v v8, (a0) -; LMULMAX1-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; LMULMAX1-NEXT: vmv.v.i v9, 0 -; LMULMAX1-NEXT: vmerge.vim v9, v9, 1, v0 +; LMULMAX1-NEXT: vmerge.vim v8, v8, 1, v0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmv.v.i v10, 0 +; LMULMAX1-NEXT: vmv.v.i v9, 0 ; LMULMAX1-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; LMULMAX1-NEXT: vslideup.vi v10, v9, 0 +; LMULMAX1-NEXT: vslideup.vi v9, v8, 0 ; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; LMULMAX1-NEXT: vmsne.vi v9, v10, 0 +; LMULMAX1-NEXT: vmsne.vi v8, v9, 0 ; LMULMAX1-NEXT: addi a0, sp, 136 +; LMULMAX1-NEXT: vsm.v v8, (a0) +; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-NEXT: vmv.v.i v8, 0 ; LMULMAX1-NEXT: li a5, 5 ; LMULMAX1-NEXT: li a6, 6 ; LMULMAX1-NEXT: li a7, 7 -; LMULMAX1-NEXT: vsm.v v9, (a0) ; LMULMAX1-NEXT: li a0, 0 ; LMULMAX1-NEXT: li a1, 0 ; LMULMAX1-NEXT: li a2, 0 ; LMULMAX1-NEXT: li a3, 0 ; LMULMAX1-NEXT: li a4, 0 -; LMULMAX1-NEXT: vmv1r.v v9, v8 -; LMULMAX1-NEXT: vmv1r.v v10, v8 -; LMULMAX1-NEXT: vmv1r.v v11, v8 -; LMULMAX1-NEXT: vmv1r.v v12, v8 -; LMULMAX1-NEXT: vmv1r.v v13, v8 -; LMULMAX1-NEXT: vmv1r.v v14, v8 -; LMULMAX1-NEXT: vmv1r.v v15, v8 -; LMULMAX1-NEXT: vmv1r.v v16, v8 -; LMULMAX1-NEXT: vmv1r.v v17, v8 -; LMULMAX1-NEXT: vmv1r.v v18, v8 -; LMULMAX1-NEXT: vmv1r.v v19, v8 -; LMULMAX1-NEXT: vmv1r.v v20, v8 -; LMULMAX1-NEXT: vmv1r.v v21, v8 -; LMULMAX1-NEXT: vmv1r.v v22, v8 -; LMULMAX1-NEXT: vmv1r.v v23, v8 +; LMULMAX1-NEXT: vmv.v.i v9, 0 +; LMULMAX1-NEXT: vmv.v.i v10, 0 +; LMULMAX1-NEXT: vmv.v.i v11, 0 +; LMULMAX1-NEXT: vmv.v.i v12, 0 +; LMULMAX1-NEXT: vmv.v.i v13, 0 +; LMULMAX1-NEXT: vmv.v.i v14, 0 +; LMULMAX1-NEXT: vmv.v.i v15, 0 +; LMULMAX1-NEXT: vmv.v.i v16, 0 +; LMULMAX1-NEXT: vmv.v.i v17, 0 +; LMULMAX1-NEXT: vmv.v.i v18, 0 +; LMULMAX1-NEXT: vmv.v.i v19, 0 +; LMULMAX1-NEXT: vmv.v.i v20, 0 +; LMULMAX1-NEXT: vmv.v.i v21, 0 +; LMULMAX1-NEXT: vmv.v.i v22, 0 +; LMULMAX1-NEXT: vmv.v.i v23, 0 ; LMULMAX1-NEXT: call vector_mask_arg_via_stack@plt ; LMULMAX1-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; LMULMAX1-NEXT: addi sp, sp, 160 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll @@ -110,12 +110,18 @@ } define void @splat_zero_v8f16(ptr %x) { -; CHECK-LABEL: splat_zero_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; CHECK-RV32-LABEL: splat_zero_v8f16: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v8, 0 +; CHECK-RV32-NEXT: vse16.v v8, (a0) +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: splat_zero_v8f16: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: sd zero, 8(a0) +; CHECK-RV64-NEXT: sd zero, 0(a0) +; CHECK-RV64-NEXT: ret %a = insertelement <8 x half> poison, half 0.0, i32 0 %b = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> zeroinitializer store <8 x half> %b, ptr %x @@ -123,12 +129,18 @@ } define void @splat_zero_v4f32(ptr %x) { -; CHECK-LABEL: splat_zero_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; CHECK-RV32-LABEL: splat_zero_v4f32: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v8, 0 +; CHECK-RV32-NEXT: vse32.v v8, (a0) +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: splat_zero_v4f32: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: sd zero, 8(a0) +; CHECK-RV64-NEXT: sd zero, 0(a0) +; CHECK-RV64-NEXT: ret %a = insertelement <4 x float> poison, float 0.0, i32 0 %b = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> zeroinitializer store <4 x float> %b, ptr %x @@ -136,12 +148,18 @@ } define void @splat_zero_v2f64(ptr %x) { -; CHECK-LABEL: splat_zero_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse64.v v8, (a0) -; CHECK-NEXT: ret +; CHECK-RV32-LABEL: splat_zero_v2f64: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v8, 0 +; CHECK-RV32-NEXT: vse64.v v8, (a0) +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: splat_zero_v2f64: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: sd zero, 8(a0) +; CHECK-RV64-NEXT: sd zero, 0(a0) +; CHECK-RV64-NEXT: ret %a = insertelement <2 x double> poison, double 0.0, i32 0 %b = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> zeroinitializer store <2 x double> %b, ptr %x Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -269,12 +269,44 @@ } define void @splat_zero_v16i8(ptr %x) { -; CHECK-LABEL: splat_zero_v16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret +; LMULMAX8-RV32-LABEL: splat_zero_v16i8: +; LMULMAX8-RV32: # %bb.0: +; LMULMAX8-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX8-RV32-NEXT: vse8.v v8, (a0) +; LMULMAX8-RV32-NEXT: ret +; +; LMULMAX2-RV32-LABEL: splat_zero_v16i8: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_zero_v16i8: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX8-RV64-LABEL: splat_zero_v16i8: +; LMULMAX8-RV64: # %bb.0: +; LMULMAX8-RV64-NEXT: sd zero, 8(a0) +; LMULMAX8-RV64-NEXT: sd zero, 0(a0) +; LMULMAX8-RV64-NEXT: ret +; +; LMULMAX2-RV64-LABEL: splat_zero_v16i8: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: sd zero, 8(a0) +; LMULMAX2-RV64-NEXT: sd zero, 0(a0) +; LMULMAX2-RV64-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_zero_v16i8: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: sd zero, 8(a0) +; LMULMAX1-RV64-NEXT: sd zero, 0(a0) +; LMULMAX1-RV64-NEXT: ret %a = insertelement <16 x i8> poison, i8 0, i32 0 %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer store <16 x i8> %b, ptr %x @@ -282,12 +314,44 @@ } define void @splat_zero_v8i16(ptr %x) { -; CHECK-LABEL: splat_zero_v8i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; LMULMAX8-RV32-LABEL: splat_zero_v8i16: +; LMULMAX8-RV32: # %bb.0: +; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) +; LMULMAX8-RV32-NEXT: ret +; +; LMULMAX2-RV32-LABEL: splat_zero_v8i16: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_zero_v8i16: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX8-RV64-LABEL: splat_zero_v8i16: +; LMULMAX8-RV64: # %bb.0: +; LMULMAX8-RV64-NEXT: sd zero, 8(a0) +; LMULMAX8-RV64-NEXT: sd zero, 0(a0) +; LMULMAX8-RV64-NEXT: ret +; +; LMULMAX2-RV64-LABEL: splat_zero_v8i16: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: sd zero, 8(a0) +; LMULMAX2-RV64-NEXT: sd zero, 0(a0) +; LMULMAX2-RV64-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_zero_v8i16: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: sd zero, 8(a0) +; LMULMAX1-RV64-NEXT: sd zero, 0(a0) +; LMULMAX1-RV64-NEXT: ret %a = insertelement <8 x i16> poison, i16 0, i32 0 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer store <8 x i16> %b, ptr %x @@ -295,12 +359,44 @@ } define void @splat_zero_v4i32(ptr %x) { -; CHECK-LABEL: splat_zero_v4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; LMULMAX8-RV32-LABEL: splat_zero_v4i32: +; LMULMAX8-RV32: # %bb.0: +; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX8-RV32-NEXT: ret +; +; LMULMAX2-RV32-LABEL: splat_zero_v4i32: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_zero_v4i32: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX8-RV64-LABEL: splat_zero_v4i32: +; LMULMAX8-RV64: # %bb.0: +; LMULMAX8-RV64-NEXT: sd zero, 8(a0) +; LMULMAX8-RV64-NEXT: sd zero, 0(a0) +; LMULMAX8-RV64-NEXT: ret +; +; LMULMAX2-RV64-LABEL: splat_zero_v4i32: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: sd zero, 8(a0) +; LMULMAX2-RV64-NEXT: sd zero, 0(a0) +; LMULMAX2-RV64-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_zero_v4i32: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: sd zero, 8(a0) +; LMULMAX1-RV64-NEXT: sd zero, 0(a0) +; LMULMAX1-RV64-NEXT: ret %a = insertelement <4 x i32> poison, i32 0, i32 0 %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer store <4 x i32> %b, ptr %x @@ -308,12 +404,44 @@ } define void @splat_zero_v2i64(ptr %x) { -; CHECK-LABEL: splat_zero_v2i64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse64.v v8, (a0) -; CHECK-NEXT: ret +; LMULMAX8-RV32-LABEL: splat_zero_v2i64: +; LMULMAX8-RV32: # %bb.0: +; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) +; LMULMAX8-RV32-NEXT: ret +; +; LMULMAX2-RV32-LABEL: splat_zero_v2i64: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_zero_v2i64: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX8-RV64-LABEL: splat_zero_v2i64: +; LMULMAX8-RV64: # %bb.0: +; LMULMAX8-RV64-NEXT: sd zero, 8(a0) +; LMULMAX8-RV64-NEXT: sd zero, 0(a0) +; LMULMAX8-RV64-NEXT: ret +; +; LMULMAX2-RV64-LABEL: splat_zero_v2i64: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: sd zero, 8(a0) +; LMULMAX2-RV64-NEXT: sd zero, 0(a0) +; LMULMAX2-RV64-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_zero_v2i64: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: sd zero, 8(a0) +; LMULMAX1-RV64-NEXT: sd zero, 0(a0) +; LMULMAX1-RV64-NEXT: ret %a = insertelement <2 x i64> poison, i64 0, i32 0 %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer store <2 x i64> %b, ptr %x @@ -450,9 +578,7 @@ define void @splat_zero_v2i16(ptr %p) { ; CHECK-LABEL: splat_zero_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: sw zero, 0(a0) ; CHECK-NEXT: ret store <2 x i16> zeroinitializer, ptr %p ret void @@ -471,23 +597,75 @@ } define void @splat_zero_v4i16(ptr %p) { -; CHECK-LABEL: splat_zero_v4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; LMULMAX8-RV32-LABEL: splat_zero_v4i16: +; LMULMAX8-RV32: # %bb.0: +; LMULMAX8-RV32-NEXT: sw zero, 4(a0) +; LMULMAX8-RV32-NEXT: sw zero, 0(a0) +; LMULMAX8-RV32-NEXT: ret +; +; LMULMAX2-RV32-LABEL: splat_zero_v4i16: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: sw zero, 4(a0) +; LMULMAX2-RV32-NEXT: sw zero, 0(a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_zero_v4i16: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: sw zero, 4(a0) +; LMULMAX1-RV32-NEXT: sw zero, 0(a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX8-RV64-LABEL: splat_zero_v4i16: +; LMULMAX8-RV64: # %bb.0: +; LMULMAX8-RV64-NEXT: sd zero, 0(a0) +; LMULMAX8-RV64-NEXT: ret +; +; LMULMAX2-RV64-LABEL: splat_zero_v4i16: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: sd zero, 0(a0) +; LMULMAX2-RV64-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_zero_v4i16: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: sd zero, 0(a0) +; LMULMAX1-RV64-NEXT: ret store <4 x i16> zeroinitializer, ptr %p ret void } define void @splat_zero_v2i32(ptr %p) { -; CHECK-LABEL: splat_zero_v2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; LMULMAX8-RV32-LABEL: splat_zero_v2i32: +; LMULMAX8-RV32: # %bb.0: +; LMULMAX8-RV32-NEXT: sw zero, 4(a0) +; LMULMAX8-RV32-NEXT: sw zero, 0(a0) +; LMULMAX8-RV32-NEXT: ret +; +; LMULMAX2-RV32-LABEL: splat_zero_v2i32: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: sw zero, 4(a0) +; LMULMAX2-RV32-NEXT: sw zero, 0(a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_zero_v2i32: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: sw zero, 4(a0) +; LMULMAX1-RV32-NEXT: sw zero, 0(a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX8-RV64-LABEL: splat_zero_v2i32: +; LMULMAX8-RV64: # %bb.0: +; LMULMAX8-RV64-NEXT: sd zero, 0(a0) +; LMULMAX8-RV64-NEXT: ret +; +; LMULMAX2-RV64-LABEL: splat_zero_v2i32: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: sd zero, 0(a0) +; LMULMAX2-RV64-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_zero_v2i32: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: sd zero, 0(a0) +; LMULMAX1-RV64-NEXT: ret store <2 x i32> zeroinitializer, ptr %p ret void } Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -26,17 +26,7 @@ define void @splat_zeros_v2i1(ptr %x) { ; CHECK-LABEL: splat_zeros_v2i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: sb zero, 0(a0) ; CHECK-NEXT: ret store <2 x i1> zeroinitializer, ptr %x ret void @@ -135,9 +125,7 @@ define void @splat_zeros_v8i1(ptr %x) { ; CHECK-LABEL: splat_zeros_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmclr.m v8 -; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: sb zero, 0(a0) ; CHECK-NEXT: ret store <8 x i1> zeroinitializer, ptr %x ret void @@ -185,31 +173,10 @@ } define void @splat_zeros_v32i1(ptr %x) { -; LMULMAX2-LABEL: splat_zeros_v32i1: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vmclr.m v8 -; LMULMAX2-NEXT: vsm.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_zeros_v32i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmclr.m v8 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a0, 2 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_zeros_v32i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmclr.m v8 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a0, 2 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: splat_zeros_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: sw zero, 0(a0) +; CHECK-NEXT: ret store <32 x i1> zeroinitializer, ptr %x ret void } Index: llvm/test/CodeGen/RISCV/rvv/splats-with-mixed-vl.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/splats-with-mixed-vl.ll +++ llvm/test/CodeGen/RISCV/rvv/splats-with-mixed-vl.ll @@ -4,9 +4,8 @@ define void @constant_splat_fixed(ptr %p) { ; CHECK-LABEL: constant_splat_fixed: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: sd zero, 8(a0) +; CHECK-NEXT: sd zero, 0(a0) ; CHECK-NEXT: ret store <4 x i32> zeroinitializer, ptr %p ret void @@ -31,9 +30,8 @@ ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: sd zero, 8(a1) +; CHECK-NEXT: sd zero, 0(a1) ; CHECK-NEXT: ret store zeroinitializer, ptr %p store <4 x i32> zeroinitializer, ptr %p2 @@ -45,9 +43,8 @@ define void @constant_splat_fixed_then_scalable(ptr %p, ptr %p2) { ; CHECK-LABEL: constant_splat_fixed_then_scalable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse32.v v8, (a1) +; CHECK-NEXT: sd zero, 8(a1) +; CHECK-NEXT: sd zero, 0(a1) ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse32.v v8, (a0) @@ -90,10 +87,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vse32.v v9, (a0) -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: sd zero, 8(a0) +; CHECK-NEXT: sd zero, 0(a0) ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %v, i32 0 @@ -110,9 +105,8 @@ ; CHECK-NEXT: vsetvli a2, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: sd zero, 8(a0) +; CHECK-NEXT: sd zero, 0(a0) ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %v, i32 0 %splat = shufflevector %elt.head, poison, zeroinitializer