diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12199,6 +12199,34 @@ } } + // If sufficiently aligned we can scalarize small constants that + // fit into i32 + // TODO: Potentially look at extending this for i64, if constant + // materialization isn't too expensive + if (DCI.isBeforeLegalize() && IsScalarizable && + MemVT.getSizeInBits() <= 32 && + ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) { + MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits()); + APInt SplatVal, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + // isConstantSplat doesn't work on elements smaller than i8 + // Set MinSplatBits to the size of the scalar to get a single splat + // value + if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), + NewVT, *Store->getMemOperand()) && + MemVT.getVectorElementType().bitsGE(MVT::i8) && + cast(Val)->isConstantSplat( + SplatVal, SplatUndef, SplatBitSize, HasAnyUndefs, + NewVT.getSizeInBits())) { + assert(SplatBitSize == NewVT.getSizeInBits()); + SDValue NewV = DAG.getConstant(SplatVal, DL, NewVT); + return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(), + Store->getPointerInfo(), Store->getOriginalAlign(), + Store->getMemOperand()->getFlags()); + } + } + // Similarly, if sufficiently aligned we can scalarize vector copies, e.g. // vsetivli zero, 2, e16, m1, ta, ma // vle16.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -64,16 +64,27 @@ } define void @buildvec_vid_step2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { -; CHECK-LABEL: buildvec_vid_step2_add0_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: vse8.v v8, (a3) -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_vid_step2_add0_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a4, 24640 +; RV32-NEXT: addi a5, a4, 512 +; RV32-NEXT: sw a5, 0(a0) +; RV32-NEXT: sw a5, 0(a1) +; RV32-NEXT: sw a4, 0(a2) +; RV32-NEXT: lui a0, 24576 +; RV32-NEXT: sw a0, 0(a3) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_vid_step2_add0_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a4, 24640 +; RV64-NEXT: addiw a5, a4, 512 +; RV64-NEXT: sw a5, 0(a0) +; RV64-NEXT: sw a5, 0(a1) +; RV64-NEXT: sw a4, 0(a2) +; RV64-NEXT: lui a0, 24576 +; RV64-NEXT: sw a0, 0(a3) +; RV64-NEXT: ret store <4 x i8> , ptr %z0 store <4 x i8> , ptr %z1 store <4 x i8> , ptr %z2 @@ -82,17 +93,31 @@ } define void @buildvec_vid_step2_add1_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { -; CHECK-LABEL: buildvec_vid_step2_add1_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vadd.vi v8, v8, 1 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: vse8.v v8, (a3) -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_vid_step2_add1_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a4, 28752 +; RV32-NEXT: addi a5, a4, 769 +; RV32-NEXT: sw a5, 0(a0) +; RV32-NEXT: addi a0, a4, 768 +; RV32-NEXT: sw a0, 0(a1) +; RV32-NEXT: sw a4, 0(a2) +; RV32-NEXT: lui a0, 28672 +; RV32-NEXT: addi a0, a0, 1 +; RV32-NEXT: sw a0, 0(a3) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_vid_step2_add1_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a4, 28752 +; RV64-NEXT: addiw a5, a4, 769 +; RV64-NEXT: sw a5, 0(a0) +; RV64-NEXT: addiw a0, a4, 768 +; RV64-NEXT: sw a0, 0(a1) +; RV64-NEXT: sw a4, 0(a2) +; RV64-NEXT: lui a0, 28672 +; RV64-NEXT: addiw a0, a0, 1 +; RV64-NEXT: sw a0, 0(a3) +; RV64-NEXT: ret store <4 x i8> , ptr %z0 store <4 x i8> , ptr %z1 store <4 x i8> , ptr %z2 @@ -104,16 +129,29 @@ ; while lowering ISD::BUILD_VECTOR is custom-lowered to RISCVISD::MUL_VL before ; being combined. define void @buildvec_vid_stepn1_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { -; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vrsub.vi v8, v8, 0 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: vse8.v v8, (a3) -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_vid_stepn1_add0_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a4, 1040368 +; RV32-NEXT: addi a4, a4, -256 +; RV32-NEXT: sw a4, 0(a0) +; RV32-NEXT: sw a4, 0(a1) +; RV32-NEXT: lui a0, 1040352 +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: lui a0, 1036288 +; RV32-NEXT: sw a0, 0(a3) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_vid_stepn1_add0_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a4, 1040368 +; RV64-NEXT: addiw a4, a4, -256 +; RV64-NEXT: sw a4, 0(a0) +; RV64-NEXT: sw a4, 0(a1) +; RV64-NEXT: lui a0, 1040352 +; RV64-NEXT: sw a0, 0(a2) +; RV64-NEXT: lui a0, 1036288 +; RV64-NEXT: sw a0, 0(a3) +; RV64-NEXT: ret store <4 x i8> , ptr %z0 store <4 x i8> , ptr %z1 store <4 x i8> , ptr %z2 @@ -122,17 +160,29 @@ } define void @buildvec_vid_stepn2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { -; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vrsub.vi v8, v8, 0 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: vse8.v v8, (a3) -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_vid_stepn2_add0_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a4, 1028048 +; RV32-NEXT: addi a4, a4, -512 +; RV32-NEXT: sw a4, 0(a0) +; RV32-NEXT: sw a4, 0(a1) +; RV32-NEXT: lui a0, 1028032 +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: lui a0, 1024000 +; RV32-NEXT: sw a0, 0(a3) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_vid_stepn2_add0_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a4, 1028048 +; RV64-NEXT: addiw a4, a4, -512 +; RV64-NEXT: sw a4, 0(a0) +; RV64-NEXT: sw a4, 0(a1) +; RV64-NEXT: lui a0, 1028032 +; RV64-NEXT: sw a0, 0(a2) +; RV64-NEXT: lui a0, 1024000 +; RV64-NEXT: sw a0, 0(a3) +; RV64-NEXT: ret store <4 x i8> , ptr %z0 store <4 x i8> , ptr %z1 store <4 x i8> , ptr %z2 @@ -141,28 +191,37 @@ } define void @buildvec_vid_stepn2_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { -; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v8, v8, v8 -; CHECK-NEXT: vrsub.vi v8, v8, 3 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_vid_stepn2_add3_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 1040368 +; RV32-NEXT: addi a1, a1, 259 +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_vid_stepn2_add3_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 1040368 +; RV64-NEXT: addiw a1, a1, 259 +; RV64-NEXT: sw a1, 0(a0) +; RV64-NEXT: ret store <4 x i8> , ptr %z0 ret void } define void @buildvec_vid_stepn3_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) { -; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: li a1, -3 -; CHECK-NEXT: vmadd.vx v9, a1, v8 -; CHECK-NEXT: vse8.v v9, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_vid_stepn3_add3_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 1028048 +; RV32-NEXT: addi a1, a1, 3 +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_vid_stepn3_add3_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 1028048 +; RV64-NEXT: addiw a1, a1, 3 +; RV64-NEXT: sw a1, 0(a0) +; RV64-NEXT: ret store <4 x i8> , ptr %z0 ret void } @@ -236,35 +295,45 @@ } define void @buildvec_no_vid_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5) { -; CHECK-LABEL: buildvec_no_vid_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a6, %hi(.LCPI14_0) -; CHECK-NEXT: addi a6, a6, %lo(.LCPI14_0) -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vle8.v v8, (a6) -; CHECK-NEXT: lui a6, %hi(.LCPI14_1) -; CHECK-NEXT: addi a6, a6, %lo(.LCPI14_1) -; CHECK-NEXT: vle8.v v9, (a6) -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vse8.v v9, (a1) -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: slli a0, a0, 11 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vse8.v v8, (a2) -; CHECK-NEXT: li a0, 2047 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: lui a0, %hi(.LCPI14_2) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_2) -; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vse8.v v8, (a3) -; CHECK-NEXT: vmv.v.i v8, -2 -; CHECK-NEXT: vse8.v v8, (a4) -; CHECK-NEXT: vse8.v v9, (a5) -; CHECK-NEXT: ret +; RV32-LABEL: buildvec_no_vid_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a6, 28768 +; RV32-NEXT: addi a6, a6, 769 +; RV32-NEXT: sw a6, 0(a0) +; RV32-NEXT: lui a0, 28752 +; RV32-NEXT: addi a0, a0, 512 +; RV32-NEXT: sw a0, 0(a1) +; RV32-NEXT: lui a0, 32768 +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: lui a0, 28672 +; RV32-NEXT: addi a0, a0, 255 +; RV32-NEXT: sw a0, 0(a3) +; RV32-NEXT: li a0, 254 +; RV32-NEXT: sw a0, 0(a4) +; RV32-NEXT: lui a0, 1032144 +; RV32-NEXT: addi a0, a0, -257 +; RV32-NEXT: sw a0, 0(a5) +; RV32-NEXT: ret +; +; RV64-LABEL: buildvec_no_vid_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a6, 28768 +; RV64-NEXT: addiw a6, a6, 769 +; RV64-NEXT: sw a6, 0(a0) +; RV64-NEXT: lui a0, 28752 +; RV64-NEXT: addiw a0, a0, 512 +; RV64-NEXT: sw a0, 0(a1) +; RV64-NEXT: lui a0, 32768 +; RV64-NEXT: sw a0, 0(a2) +; RV64-NEXT: lui a0, 28672 +; RV64-NEXT: addiw a0, a0, 255 +; RV64-NEXT: sw a0, 0(a3) +; RV64-NEXT: li a0, 254 +; RV64-NEXT: sw a0, 0(a4) +; RV64-NEXT: lui a0, 1032144 +; RV64-NEXT: addiw a0, a0, -257 +; RV64-NEXT: sw a0, 0(a5) +; RV64-NEXT: ret store <4 x i8> , ptr %z0 store <4 x i8> , ptr %z1 store <4 x i8> , ptr %z2 @@ -311,9 +380,8 @@ define void @buildvec_dominant1_v2i8(ptr %x) { ; CHECK-LABEL: buildvec_dominant1_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, -1 -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: li a1, -256 +; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret store <2 x i8> , ptr %x ret void @@ -322,10 +390,8 @@ define void @buildvec_dominant2_v2i8(ptr %x) { ; CHECK-LABEL: buildvec_dominant2_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vrsub.vi v8, v8, 0 -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: li a1, -256 +; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret store <2 x i8> , ptr %x ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -191,27 +191,27 @@ define void @store_constant_v2i8(ptr %p) { ; CHECK-LABEL: store_constant_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: li a1, 3 -; CHECK-NEXT: vmadd.vx v9, a1, v8 -; CHECK-NEXT: vse8.v v9, (a0) +; CHECK-NEXT: li a1, 1539 +; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret store <2 x i8> , ptr %p ret void } define void @store_constant_v2i16(ptr %p) { -; CHECK-LABEL: store_constant_v2i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: li a1, 3 -; CHECK-NEXT: vmadd.vx v9, a1, v8 -; CHECK-NEXT: vse16.v v9, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: store_constant_v2i16: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 96 +; RV32-NEXT: addi a1, a1, 3 +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_constant_v2i16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 96 +; RV64-NEXT: addiw a1, a1, 3 +; RV64-NEXT: sw a1, 0(a0) +; RV64-NEXT: ret store <2 x i16> , ptr %p ret void } @@ -231,14 +231,19 @@ } define void @store_constant_v4i8(ptr %p) { -; CHECK-LABEL: store_constant_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI12_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI12_0) -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: store_constant_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 4176 +; RV32-NEXT: addi a1, a1, 1539 +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_constant_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 4176 +; RV64-NEXT: addiw a1, a1, 1539 +; RV64-NEXT: sw a1, 0(a0) +; RV64-NEXT: ret store <4 x i8> , ptr %p ret void } @@ -270,12 +275,19 @@ } define void @store_id_v4i8(ptr %p) { -; CHECK-LABEL: store_id_v4i8: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: store_id_v4i8: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 12320 +; RV32-NEXT: addi a1, a1, 256 +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_id_v4i8: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 12320 +; RV64-NEXT: addiw a1, a1, 256 +; RV64-NEXT: sw a1, 0(a0) +; RV64-NEXT: ret store <4 x i8> , ptr %p ret void } @@ -297,9 +309,8 @@ define void @store_constant_splat_v2i8(ptr %p) { ; CHECK-LABEL: store_constant_splat_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: li a1, 771 +; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret store <2 x i8> , ptr %p ret void