diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -509,6 +509,7 @@ SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, int64_t ExtTrueVal) const; SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -428,7 +428,7 @@ } for (MVT VT : BoolVecVTs) { - setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); // Mask VTs are custom-expanded into a series of standard nodes setOperationAction(ISD::TRUNCATE, VT, Custom); @@ -1358,6 +1358,15 @@ return Vec; } + // A splat can be lowered as a SETCC. For each fixed-length mask vector + // type, we have a legal equivalently-sized i8 type, so we can use that. + if (SDValue Splat = cast(Op)->getSplatValue()) { + MVT InterVT = VT.changeVectorElementType(MVT::i8); + Splat = DAG.getSplatBuildVector(InterVT, DL, Splat); + SDValue Zero = DAG.getConstant(0, DL, InterVT); + return DAG.getSetCC(DL, VT, Splat, Zero, ISD::SETNE); + } + return SDValue(); } @@ -1981,6 +1990,10 @@ Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1); return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL); + case ISD::SPLAT_VECTOR: + assert(Op.getValueType().getVectorElementType() == MVT::i1 && + "Unexpected SPLAT_VECTOR lowering"); + return lowerVectorMaskSplat(Op, DAG); case ISD::SPLAT_VECTOR_PARTS: return lowerSPLAT_VECTOR_PARTS(Op, DAG); case ISD::INSERT_VECTOR_ELT: @@ -2766,6 +2779,22 @@ return DAG.getMergeValues(Parts, DL); } +// Lower splats of i1 types to SETCC. For each mask vector type, we have a +// legal equivalently-sized i8 type, so we can use that as a go-between. +SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, + SelectionDAG &DAG) const { + SDValue SplatVal = Op.getOperand(0); + // All-zeros or all-ones splats are handled specially. + if (isa(SplatVal)) + return Op; + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + MVT InterVT = VT.changeVectorElementType(MVT::i8); + SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal); + SDValue Zero = DAG.getConstant(0, DL, InterVT); + return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE); +} + // Custom-lower a SPLAT_VECTOR_PARTS where XLEN* %x, i1 %y) { +; CHECK-LABEL: splat_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 1, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <1 x i1> undef, i1 %y, i32 0 + %b = shufflevector <1 x i1> %a, <1 x i1> undef, <1 x i32> zeroinitializer + store <1 x i1> %b, <1 x i1>* %x + ret void +} + define void @splat_ones_v4i1(<4 x i1>* %x) { ; CHECK-LABEL: splat_ones_v4i1: ; CHECK: # %bb.0: @@ -61,6 +83,28 @@ ret void } +define void @splat_v4i1(<4 x i1>* %x, i1 %y) { +; CHECK-LABEL: splat_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 4, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i1> undef, i1 %y, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> zeroinitializer + store <4 x i1> %b, <4 x i1>* %x + ret void +} + define void @splat_zeros_v8i1(<8 x i1>* %x) { ; CHECK-LABEL: splat_zeros_v8i1: ; CHECK: # %bb.0: @@ -72,6 +116,20 @@ ret void } +define void @splat_v8i1(<8 x i1>* %x, i1 %y) { +; CHECK-LABEL: splat_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vse1.v v26, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i1> undef, i1 %y, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> zeroinitializer + store <8 x i1> %b, <8 x i1>* %x + ret void +} + define void @splat_ones_v16i1(<16 x i1>* %x) { ; CHECK-LABEL: splat_ones_v16i1: ; CHECK: # %bb.0: @@ -83,6 +141,20 @@ ret void } +define void @splat_v16i1(<16 x i1>* %x, i1 %y) { +; CHECK-LABEL: splat_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vse1.v v26, (a0) +; CHECK-NEXT: ret + %a = insertelement <16 x i1> undef, i1 %y, i32 0 + %b = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> zeroinitializer + store <16 x i1> %b, <16 x i1>* %x + ret void +} + define void @splat_zeros_v32i1(<32 x i1>* %x) { ; LMULMAX2-LABEL: splat_zeros_v32i1: ; LMULMAX2: # %bb.0: @@ -113,6 +185,41 @@ ret void } +define void @splat_v32i1(<32 x i1>* %x, i1 %y) { +; LMULMAX2-LABEL: splat_v32i1: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vmsne.vi v25, v26, 0 +; LMULMAX2-NEXT: vse1.v v25, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_v32i1: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v25, a1 +; LMULMAX1-RV32-NEXT: vmsne.vi v26, v25, 0 +; LMULMAX1-RV32-NEXT: addi a1, a0, 2 +; LMULMAX1-RV32-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV32-NEXT: vse1.v v26, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_v32i1: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1 +; LMULMAX1-RV64-NEXT: vmsne.vi v26, v25, 0 +; LMULMAX1-RV64-NEXT: addi a1, a0, 2 +; LMULMAX1-RV64-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV64-NEXT: vse1.v v26, (a0) +; LMULMAX1-RV64-NEXT: ret + %a = insertelement <32 x i1> undef, i1 %y, i32 0 + %b = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> zeroinitializer + store <32 x i1> %b, <32 x i1>* %x + ret void +} + define void @splat_ones_v64i1(<64 x i1>* %x) { ; LMULMAX2-LABEL: splat_ones_v64i1: ; LMULMAX2: # %bb.0: @@ -152,3 +259,48 @@ store <64 x i1> , <64 x i1>* %x ret void } + +define void @splat_v64i1(<64 x i1>* %x, i1 %y) { +; LMULMAX2-LABEL: splat_v64i1: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vmsne.vi v25, v26, 0 +; LMULMAX2-NEXT: addi a1, a0, 4 +; LMULMAX2-NEXT: vse1.v v25, (a1) +; LMULMAX2-NEXT: vse1.v v25, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_v64i1: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v25, a1 +; LMULMAX1-RV32-NEXT: vmsne.vi v26, v25, 0 +; LMULMAX1-RV32-NEXT: addi a1, a0, 6 +; LMULMAX1-RV32-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV32-NEXT: addi a1, a0, 4 +; LMULMAX1-RV32-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV32-NEXT: addi a1, a0, 2 +; LMULMAX1-RV32-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV32-NEXT: vse1.v v26, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_v64i1: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1 +; LMULMAX1-RV64-NEXT: vmsne.vi v26, v25, 0 +; LMULMAX1-RV64-NEXT: addi a1, a0, 6 +; LMULMAX1-RV64-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV64-NEXT: addi a1, a0, 4 +; LMULMAX1-RV64-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV64-NEXT: addi a1, a0, 2 +; LMULMAX1-RV64-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV64-NEXT: vse1.v v26, (a0) +; LMULMAX1-RV64-NEXT: ret + %a = insertelement <64 x i1> undef, i1 %y, i32 0 + %b = shufflevector <64 x i1> %a, <64 x i1> undef, <64 x i32> zeroinitializer + store <64 x i1> %b, <64 x i1>* %x + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll @@ -24,6 +24,18 @@ ret %splat } +define @vsplat_nxv1i1_2(i1 %x) { +; CHECK-LABEL: vsplat_nxv1i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 %x, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +} + define @vsplat_nxv2i1_0() { ; CHECK-LABEL: vsplat_nxv2i1_0: ; CHECK: # %bb.0: @@ -46,6 +58,18 @@ ret %splat } +define @vsplat_nxv2i1_2(i1 %x) { +; CHECK-LABEL: vsplat_nxv2i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 %x, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +} + define @vsplat_nxv4i1_0() { ; CHECK-LABEL: vsplat_nxv4i1_0: ; CHECK: # %bb.0: @@ -68,6 +92,18 @@ ret %splat } +define @vsplat_nxv4i1_2(i1 %x) { +; CHECK-LABEL: vsplat_nxv4i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 %x, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +} + define @vsplat_nxv8i1_0() { ; CHECK-LABEL: vsplat_nxv8i1_0: ; CHECK: # %bb.0: @@ -90,6 +126,18 @@ ret %splat } +define @vsplat_nxv8i1_2(i1 %x) { +; CHECK-LABEL: vsplat_nxv8i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 %x, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +} + define @vsplat_nxv16i1_0() { ; CHECK-LABEL: vsplat_nxv16i1_0: ; CHECK: # %bb.0: @@ -111,3 +159,15 @@ %splat = shufflevector %head, undef, zeroinitializer ret %splat } + +define @vsplat_nxv16i1_2(i1 %x) { +; CHECK-LABEL: vsplat_nxv16i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 %x, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +}