Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12170,7 +12170,33 @@ break; auto *Store = cast(N); + EVT MemVT = Store->getMemoryVT(); SDValue Val = Store->getValue(); + + // Using vector to store zeros requires e.g.: + // vsetivli zero, 2, e64, m1, ta, ma + // vmv.v.i v8, 0 + // vse64.v v8, (a0) + // If sufficiently aligned, we can use at most one scalar store to zero + // initialize any power-of-two size up to XLen bits. + if (DCI.isBeforeLegalize() && !Store->isTruncatingStore() && + !Store->isIndexed() && ISD::isBuildVectorAllZeros(Val.getNode()) && + MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) && + isPowerOf2_64(MemVT.getSizeInBits()) && + MemVT.getSizeInBits() <= Subtarget.getXLen()) { + assert(!MemVT.isScalableVector()); + auto NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); + if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + NewVT, *Store->getMemOperand())) { + SDLoc DL(N); + SDValue Chain = Store->getChain(); + auto NewV = DAG.getConstant(0, DL, NewVT); + return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(), + Store->getPointerInfo(), Store->getOriginalAlign(), + Store->getMemOperand()->getFlags()); + } + } + // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1. // vfmv.f.s is represented as extract element from 0. Match it late to avoid // any illegal types. @@ -12180,7 +12206,6 @@ isNullConstant(Val.getOperand(1)))) { SDValue Src = Val.getOperand(0); MVT VecVT = Src.getSimpleValueType(); - EVT MemVT = Store->getMemoryVT(); // VecVT should be scalable and memory VT should match the element type. if (VecVT.isScalableVector() && MemVT == VecVT.getVectorElementType()) { Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -450,9 +450,7 @@ define void @splat_zero_v2i16(ptr %p) { ; CHECK-LABEL: splat_zero_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: sw zero, 0(a0) ; CHECK-NEXT: ret store <2 x i16> zeroinitializer, ptr %p ret void @@ -471,23 +469,81 @@ } define void @splat_zero_v4i16(ptr %p) { -; CHECK-LABEL: splat_zero_v4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; LMULMAX8-RV32-LABEL: splat_zero_v4i16: +; LMULMAX8-RV32: # %bb.0: +; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX8-RV32-NEXT: vse16.v v8, (a0) +; LMULMAX8-RV32-NEXT: ret +; +; LMULMAX2-RV32-LABEL: splat_zero_v4i16: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_zero_v4i16: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX8-RV64-LABEL: splat_zero_v4i16: +; LMULMAX8-RV64: # %bb.0: +; LMULMAX8-RV64-NEXT: sd zero, 0(a0) +; LMULMAX8-RV64-NEXT: ret +; +; LMULMAX2-RV64-LABEL: splat_zero_v4i16: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: sd zero, 0(a0) +; LMULMAX2-RV64-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_zero_v4i16: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: sd zero, 0(a0) +; LMULMAX1-RV64-NEXT: ret store <4 x i16> zeroinitializer, ptr %p ret void } define void @splat_zero_v2i32(ptr %p) { -; CHECK-LABEL: splat_zero_v2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; LMULMAX8-RV32-LABEL: splat_zero_v2i32: +; LMULMAX8-RV32: # %bb.0: +; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX8-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX8-RV32-NEXT: ret +; +; LMULMAX2-RV32-LABEL: splat_zero_v2i32: +; LMULMAX2-RV32: # %bb.0: +; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX2-RV32-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_zero_v2i32: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0 +; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX8-RV64-LABEL: splat_zero_v2i32: +; LMULMAX8-RV64: # %bb.0: +; LMULMAX8-RV64-NEXT: sd zero, 0(a0) +; LMULMAX8-RV64-NEXT: ret +; +; LMULMAX2-RV64-LABEL: splat_zero_v2i32: +; LMULMAX2-RV64: # %bb.0: +; LMULMAX2-RV64-NEXT: sd zero, 0(a0) +; LMULMAX2-RV64-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_zero_v2i32: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: sd zero, 0(a0) +; LMULMAX1-RV64-NEXT: ret store <2 x i32> zeroinitializer, ptr %p ret void } Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -26,17 +26,7 @@ define void @splat_zeros_v2i1(ptr %x) { ; CHECK-LABEL: splat_zeros_v2i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmclr.m v0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: sb zero, 0(a0) ; CHECK-NEXT: ret store <2 x i1> zeroinitializer, ptr %x ret void @@ -135,9 +125,7 @@ define void @splat_zeros_v8i1(ptr %x) { ; CHECK-LABEL: splat_zeros_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmclr.m v8 -; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: sb zero, 0(a0) ; CHECK-NEXT: ret store <8 x i1> zeroinitializer, ptr %x ret void @@ -185,31 +173,10 @@ } define void @splat_zeros_v32i1(ptr %x) { -; LMULMAX2-LABEL: splat_zeros_v32i1: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: li a1, 32 -; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-NEXT: vmclr.m v8 -; LMULMAX2-NEXT: vsm.v v8, (a0) -; LMULMAX2-NEXT: ret -; -; LMULMAX1-RV32-LABEL: splat_zeros_v32i1: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmclr.m v8 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a0, a0, 2 -; LMULMAX1-RV32-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_zeros_v32i1: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmclr.m v8 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a0, a0, 2 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV64-NEXT: ret +; CHECK-LABEL: splat_zeros_v32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: sw zero, 0(a0) +; CHECK-NEXT: ret store <32 x i1> zeroinitializer, ptr %x ret void }