diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12193,18 +12193,32 @@ isPowerOf2_64(MemVT.getSizeInBits()) && MemVT.getSizeInBits() <= Subtarget.getXLen(); - // Using vector to store zeros requires e.g.: - // vsetivli zero, 2, e64, m1, ta, ma - // vmv.v.i v8, 0 + // If sufficiently aligned we can scalarize stores of constant vectors of + // any power-of-two size up to XLen bits, provided that they aren't too + // expensive to materialize. + // vsetivli zero, 2, e8, m1, ta, ma + // vmv.v.i v8, 4 // vse64.v v8, (a0) - // If sufficiently aligned, we can use at most one scalar store to zero - // initialize any power-of-two size up to XLen bits. + // -> + // li a1, 1028 + // sh a1, 0(a0) if (DCI.isBeforeLegalize() && IsScalarizable && - ISD::isBuildVectorAllZeros(Val.getNode())) { - auto NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); - if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) { + // Get the constant vector bits + APInt NewC(Val.getValueSizeInBits(), 0); + for (unsigned i = 0; i < Val.getNumOperands(); i++) { + if (Val.getOperand(i).isUndef()) + continue; + NewC.insertBits(Val.getConstantOperandAPInt(i), + i * Val.getScalarValueSizeInBits()); + } + MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); + + if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), + Subtarget.getFeatureBits(), true) <= 2 && + allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), NewVT, *Store->getMemOperand())) { - auto NewV = DAG.getConstant(0, DL, NewVT); + SDValue NewV = DAG.getConstant(NewC, DL, NewVT); return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(), Store->getPointerInfo(), Store->getOriginalAlign(), Store->getMemOperand()->getFlags()); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -63,108 +63,103 @@ ret void } -define void @buildvec_vid_step2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { +; Some tests return this struct because the stores end up being scalarized. +%x4v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>} + +define %x4v4i8 @buildvec_vid_step2_add0_v4i8() { ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: vse8.v v8, (a3) -; CHECK-NEXT: ret - store <4 x i8> , ptr %z0 - store <4 x i8> , ptr %z1 - store <4 x i8> , ptr %z2 - store <4 x i8> , ptr %z3 - ret void +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: ret + %1 = insertvalue %x4v4i8 poison, <4 x i8> , 0 + %2 = insertvalue %x4v4i8 %1, <4 x i8> , 1 + %3 = insertvalue %x4v4i8 %2, <4 x i8> , 2 + %4 = insertvalue %x4v4i8 %3, <4 x i8> , 3 + ret %x4v4i8 %4 } -define void @buildvec_vid_step2_add1_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { +define %x4v4i8 @buildvec_vid_step2_add1_v4i8() { ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vadd.vi v8, v8, 1 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: vse8.v v8, (a3) -; CHECK-NEXT: ret - store <4 x i8> , ptr %z0 - store <4 x i8> , ptr %z1 - store <4 x i8> , ptr %z2 - store <4 x i8> , ptr %z3 - ret void +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: ret + %1 = insertvalue %x4v4i8 poison, <4 x i8> , 0 + %2 = insertvalue %x4v4i8 %1, <4 x i8> , 1 + %3 = insertvalue %x4v4i8 %2, <4 x i8> , 2 + %4 = insertvalue %x4v4i8 %3, <4 x i8> , 3 + ret %x4v4i8 %4 } ; FIXME: This could generate vrsub.vi but the (ISD::MUL X, -1) we generate ; while lowering ISD::BUILD_VECTOR is custom-lowered to RISCVISD::MUL_VL before ; being combined. -define void @buildvec_vid_stepn1_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { +define %x4v4i8 @buildvec_vid_stepn1_add0_v4i8() { ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vi v8, v8, 0 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: vse8.v v8, (a3) -; CHECK-NEXT: ret - store <4 x i8> , ptr %z0 - store <4 x i8> , ptr %z1 - store <4 x i8> , ptr %z2 - store <4 x i8> , ptr %z3 - ret void +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: ret + %1 = insertvalue %x4v4i8 poison, <4 x i8> , 0 + %2 = insertvalue %x4v4i8 %1, <4 x i8> , 1 + %3 = insertvalue %x4v4i8 %2, <4 x i8> , 2 + %4 = insertvalue %x4v4i8 %3, <4 x i8> , 3 + ret %x4v4i8 %4 } -define void @buildvec_vid_stepn2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { +define %x4v4i8 @buildvec_vid_stepn2_add0_v4i8() { ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vrsub.vi v8, v8, 0 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: vse8.v v8, (a3) -; CHECK-NEXT: ret - store <4 x i8> , ptr %z0 - store <4 x i8> , ptr %z1 - store <4 x i8> , ptr %z2 - store <4 x i8> , ptr %z3 - ret void +; CHECK-NEXT: vmv.v.i v11, -6 +; CHECK-NEXT: vmv1r.v v9, v8 +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: ret + %1 = insertvalue %x4v4i8 poison, <4 x i8> , 0 + %2 = insertvalue %x4v4i8 %1, <4 x i8> , 1 + %3 = insertvalue %x4v4i8 %2, <4 x i8> , 2 + %4 = insertvalue %x4v4i8 %3, <4 x i8> , 3 + ret %x4v4i8 %4 } -define void @buildvec_vid_stepn2_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { +define <4 x i8> @buildvec_vid_stepn2_add3_v4i8() { ; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vrsub.vi v8, v8, 3 -; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret - store <4 x i8> , ptr %z0 - ret void + ret <4 x i8> } -define void @buildvec_vid_stepn3_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { +define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() { ; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: li a1, -3 -; CHECK-NEXT: vmadd.vx v9, a1, v8 -; CHECK-NEXT: vse8.v v9, (a0) +; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: li a0, -3 +; CHECK-NEXT: vmadd.vx v8, a0, v9 ; CHECK-NEXT: ret - store <4 x i8> , ptr %z0 - ret void + ret <4 x i8> } define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { @@ -235,43 +230,37 @@ ret <4 x i64> } -define void @buildvec_no_vid_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5) { +%x6v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>} + +define %x6v4i8 @buildvec_no_vid_v4i8() { ; CHECK-LABEL: buildvec_no_vid_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a6, %hi(.LCPI14_0) -; CHECK-NEXT: addi a6, a6, %lo(.LCPI14_0) +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_0) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vle8.v v8, (a6) -; CHECK-NEXT: lui a6, %hi(.LCPI14_1) -; CHECK-NEXT: addi a6, a6, %lo(.LCPI14_1) -; CHECK-NEXT: vle8.v v9, (a6) -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI14_1) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_1) +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: slli a0, a0, 11 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vse8.v v8, (a2) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: li a0, 2047 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI14_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_2) -; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vse8.v v8, (a3) -; CHECK-NEXT: vmv.v.i v8, -2 -; CHECK-NEXT: vse8.v v8, (a4) -; CHECK-NEXT: vse8.v v9, (a5) -; CHECK-NEXT: ret - store <4 x i8> , ptr %z0 - store <4 x i8> , ptr %z1 - store <4 x i8> , ptr %z2 - store <4 x i8> , ptr %z3 - store <4 x i8> , ptr %z4 - store <4 x i8> , ptr %z5 - ret void +; CHECK-NEXT: vle8.v v13, (a0) +; CHECK-NEXT: vmv.v.i v12, -2 +; CHECK-NEXT: ret + %1 = insertvalue %x6v4i8 poison, <4 x i8> , 0 + %2 = insertvalue %x6v4i8 %1, <4 x i8> , 1 + %3 = insertvalue %x6v4i8 %2, <4 x i8> , 2 + %4 = insertvalue %x6v4i8 %3, <4 x i8> , 3 + %5 = insertvalue %x6v4i8 %4, <4 x i8> , 4 + %6 = insertvalue %x6v4i8 %5, <4 x i8> , 5 + ret %x6v4i8 %6 } define void @buildvec_dominant0_v8i16(ptr %x) { @@ -300,35 +289,30 @@ ret void } -define void @buildvec_dominant0_v2i8(ptr %x) { +define <2 x i8> @buildvec_dominant0_v2i8() { ; CHECK-LABEL: buildvec_dominant0_v2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: ret - store <2 x i8> , ptr %x - ret void + ret <2 x i8> } -define void @buildvec_dominant1_v2i8(ptr %x) { +define <2 x i8> @buildvec_dominant1_v2i8() { ; CHECK-LABEL: buildvec_dominant1_v2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret - store <2 x i8> , ptr %x - ret void + ret <2 x i8> } -define void @buildvec_dominant2_v2i8(ptr %x) { +define <2 x i8> @buildvec_dominant2_v2i8() { ; CHECK-LABEL: buildvec_dominant2_v2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vrsub.vi v8, v8, 0 -; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret - store <2 x i8> , ptr %x - ret void + ret <2 x i8> } define void @buildvec_dominant0_v2i32(ptr %x) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -7,17 +7,8 @@ define void @splat_ones_v1i1(ptr %x) { ; CHECK-LABEL: splat_ones_v1i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 1, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: sb a1, 0(a0) ; CHECK-NEXT: ret store <1 x i1> , ptr %x ret void @@ -83,17 +74,8 @@ define void @splat_ones_v4i1(ptr %x) { ; CHECK-LABEL: splat_ones_v4i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 -; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: li a1, 15 +; CHECK-NEXT: sb a1, 0(a0) ; CHECK-NEXT: ret store <4 x i1> , ptr %x ret void @@ -149,9 +131,8 @@ define void @splat_ones_v16i1(ptr %x) { ; CHECK-LABEL: splat_ones_v16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmset.m v8 -; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret store <16 x i1> , ptr %x ret void @@ -220,16 +201,6 @@ } define void @splat_ones_v64i1(ptr %x) { -; LMULMAX2-LABEL: splat_ones_v64i1: -; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi a1, a0, 4 -; LMULMAX2-NEXT: li a2, 32 -; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; LMULMAX2-NEXT: vmset.m v8 -; LMULMAX2-NEXT: vsm.v v8, (a1) -; LMULMAX2-NEXT: vsm.v v8, (a0) -; LMULMAX2-NEXT: ret -; ; LMULMAX1-RV32-LABEL: splat_ones_v64i1: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -245,15 +216,8 @@ ; ; LMULMAX1-RV64-LABEL: splat_ones_v64i1: ; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmset.m v8 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a1, a0, 6 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV64-NEXT: addi a1, a0, 4 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a1) -; LMULMAX1-RV64-NEXT: addi a0, a0, 2 -; LMULMAX1-RV64-NEXT: vsm.v v8, (a0) +; LMULMAX1-RV64-NEXT: li a1, -1 +; LMULMAX1-RV64-NEXT: sd a1, 0(a0) ; LMULMAX1-RV64-NEXT: ret store <64 x i1> , ptr %x ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -191,27 +191,27 @@ define void @store_constant_v2i8(ptr %p) { ; CHECK-LABEL: store_constant_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: li a1, 3 -; CHECK-NEXT: vmadd.vx v9, a1, v8 -; CHECK-NEXT: vse8.v v9, (a0) +; CHECK-NEXT: li a1, 1539 +; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret store <2 x i8> , ptr %p ret void } define void @store_constant_v2i16(ptr %p) { -; CHECK-LABEL: store_constant_v2i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: li a1, 3 -; CHECK-NEXT: vmadd.vx v9, a1, v8 -; CHECK-NEXT: vse16.v v9, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: store_constant_v2i16: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 96 +; RV32-NEXT: addi a1, a1, 3 +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_constant_v2i16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 96 +; RV64-NEXT: addiw a1, a1, 3 +; RV64-NEXT: sw a1, 0(a0) +; RV64-NEXT: ret store <2 x i16> , ptr %p ret void } @@ -231,14 +231,19 @@ } define void @store_constant_v4i8(ptr %p) { -; CHECK-LABEL: store_constant_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI12_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI12_0) -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: store_constant_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 4176 +; RV32-NEXT: addi a1, a1, 1539 +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_constant_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 4176 +; RV64-NEXT: addiw a1, a1, 1539 +; RV64-NEXT: sw a1, 0(a0) +; RV64-NEXT: ret store <4 x i8> , ptr %p ret void } @@ -270,12 +275,19 @@ } define void @store_id_v4i8(ptr %p) { -; CHECK-LABEL: store_id_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: store_id_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 12320 +; RV32-NEXT: addi a1, a1, 256 +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_id_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 12320 +; RV64-NEXT: addiw a1, a1, 256 +; RV64-NEXT: sw a1, 0(a0) +; RV64-NEXT: ret store <4 x i8> , ptr %p ret void } @@ -297,9 +309,8 @@ define void @store_constant_splat_v2i8(ptr %p) { ; CHECK-LABEL: store_constant_splat_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: li a1, 771 +; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret store <2 x i8> , ptr %p ret void @@ -308,9 +319,8 @@ define void @store_constant_undef_v2i8(ptr %p) { ; CHECK-LABEL: store_constant_undef_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: li a1, 768 +; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret store <2 x i8> , ptr %p ret void