diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -511,6 +511,7 @@ SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, int64_t ExtTrueVal) const; SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -428,7 +428,7 @@ } for (MVT VT : BoolVecVTs) { - setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); // Mask VTs are custom-expanded into a series of standard nodes setOperationAction(ISD::TRUNCATE, VT, Custom); @@ -1388,6 +1388,19 @@ return Vec; } + // A splat can be lowered as a SETCC. For each fixed-length mask vector + // type, we have a legal equivalently-sized i8 type, so we can use that. + if (SDValue Splat = cast(Op)->getSplatValue()) { + assert(Splat.getValueType() == XLenVT && + "Unexpected type for i1 splat value"); + MVT InterVT = VT.changeVectorElementType(MVT::i8); + Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat, + DAG.getConstant(1, DL, XLenVT)); + Splat = DAG.getSplatBuildVector(InterVT, DL, Splat); + SDValue Zero = DAG.getConstant(0, DL, InterVT); + return DAG.getSetCC(DL, VT, Splat, Zero, ISD::SETNE); + } + return SDValue(); } @@ -2244,6 +2257,8 @@ case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::SPLAT_VECTOR: + if (Op.getValueType().getVectorElementType() == MVT::i1) + return lowerVectorMaskSplat(Op, DAG); return lowerSPLAT_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget); @@ -2802,6 +2817,27 @@ return DAG.getMergeValues(Parts, DL); } +// Lower splats of i1 types to SETCC. For each mask vector type, we have a +// legal equivalently-sized i8 type, so we can use that as a go-between. +SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op, + SelectionDAG &DAG) const { + SDValue SplatVal = Op.getOperand(0); + // All-zeros or all-ones splats are handled specially. + if (isa(SplatVal)) + return Op; + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + MVT XLenVT = Subtarget.getXLenVT(); + assert(SplatVal.getValueType() == XLenVT && + "Unexpected type for i1 splat value"); + MVT InterVT = VT.changeVectorElementType(MVT::i8); + SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal, + DAG.getConstant(1, DL, XLenVT)); + SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal); + SDValue Zero = DAG.getConstant(0, DL, InterVT); + return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE); +} + // Custom-lower a SPLAT_VECTOR_PARTS where XLEN* %x, i1 %y) { +; CHECK-LABEL: splat_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 1, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <1 x i1> undef, i1 %y, i32 0 + %b = shufflevector <1 x i1> %a, <1 x i1> undef, <1 x i32> zeroinitializer + store <1 x i1> %b, <1 x i1>* %x + ret void +} + +define void @splat_v1i1_icmp(<1 x i1>* %x, i32 signext %y, i32 signext %z) { +; CHECK-LABEL: splat_v1i1_icmp: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: vsetivli a2, 1, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 1, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %c = icmp eq i32 %y, %z + %a = insertelement <1 x i1> undef, i1 %c, i32 0 + %b = shufflevector <1 x i1> %a, <1 x i1> undef, <1 x i32> zeroinitializer + store <1 x i1> %b, <1 x i1>* %x + ret void +} + define void @splat_ones_v4i1(<4 x i1>* %x) { ; CHECK-LABEL: splat_ones_v4i1: ; CHECK: # %bb.0: @@ -61,6 +109,29 @@ ret void } +define void @splat_v4i1(<4 x i1>* %x, i1 %y) { +; CHECK-LABEL: splat_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: vsetivli a2, 4, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vsetivli a1, 4, e8,m1,tu,mu +; CHECK-NEXT: vslideup.vi v26, v25, 0 +; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu +; CHECK-NEXT: vmsne.vi v25, v26, 0 +; CHECK-NEXT: vse1.v v25, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i1> undef, i1 %y, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> zeroinitializer + store <4 x i1> %b, <4 x i1>* %x + ret void +} + define void @splat_zeros_v8i1(<8 x i1>* %x) { ; CHECK-LABEL: splat_zeros_v8i1: ; CHECK: # %bb.0: @@ -72,6 +143,21 @@ ret void } +define void @splat_v8i1(<8 x i1>* %x, i1 %y) { +; CHECK-LABEL: splat_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: vsetivli a2, 8, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vse1.v v26, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i1> undef, i1 %y, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> undef, <8 x i32> zeroinitializer + store <8 x i1> %b, <8 x i1>* %x + ret void +} + define void @splat_ones_v16i1(<16 x i1>* %x) { ; CHECK-LABEL: splat_ones_v16i1: ; CHECK: # %bb.0: @@ -83,6 +169,21 @@ ret void } +define void @splat_v16i1(<16 x i1>* %x, i1 %y) { +; CHECK-LABEL: splat_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a1 +; CHECK-NEXT: vmsne.vi v26, v25, 0 +; CHECK-NEXT: vse1.v v26, (a0) +; CHECK-NEXT: ret + %a = insertelement <16 x i1> undef, i1 %y, i32 0 + %b = shufflevector <16 x i1> %a, <16 x i1> undef, <16 x i32> zeroinitializer + store <16 x i1> %b, <16 x i1>* %x + ret void +} + define void @splat_zeros_v32i1(<32 x i1>* %x) { ; LMULMAX2-LABEL: splat_zeros_v32i1: ; LMULMAX2: # %bb.0: @@ -113,6 +214,44 @@ ret void } +define void @splat_v32i1(<32 x i1>* %x, i1 %y) { +; LMULMAX2-LABEL: splat_v32i1: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: andi a1, a1, 1 +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vmsne.vi v25, v26, 0 +; LMULMAX2-NEXT: vse1.v v25, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_v32i1: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: andi a1, a1, 1 +; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v25, a1 +; LMULMAX1-RV32-NEXT: vmsne.vi v26, v25, 0 +; LMULMAX1-RV32-NEXT: addi a1, a0, 2 +; LMULMAX1-RV32-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV32-NEXT: vse1.v v26, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_v32i1: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: andi a1, a1, 1 +; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1 +; LMULMAX1-RV64-NEXT: vmsne.vi v26, v25, 0 +; LMULMAX1-RV64-NEXT: addi a1, a0, 2 +; LMULMAX1-RV64-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV64-NEXT: vse1.v v26, (a0) +; LMULMAX1-RV64-NEXT: ret + %a = insertelement <32 x i1> undef, i1 %y, i32 0 + %b = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> zeroinitializer + store <32 x i1> %b, <32 x i1>* %x + ret void +} + define void @splat_ones_v64i1(<64 x i1>* %x) { ; LMULMAX2-LABEL: splat_ones_v64i1: ; LMULMAX2: # %bb.0: @@ -152,3 +291,51 @@ store <64 x i1> , <64 x i1>* %x ret void } + +define void @splat_v64i1(<64 x i1>* %x, i1 %y) { +; LMULMAX2-LABEL: splat_v64i1: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: andi a1, a1, 1 +; LMULMAX2-NEXT: addi a2, zero, 32 +; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu +; LMULMAX2-NEXT: vmv.v.x v26, a1 +; LMULMAX2-NEXT: vmsne.vi v25, v26, 0 +; LMULMAX2-NEXT: addi a1, a0, 4 +; LMULMAX2-NEXT: vse1.v v25, (a1) +; LMULMAX2-NEXT: vse1.v v25, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: splat_v64i1: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: andi a1, a1, 1 +; LMULMAX1-RV32-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: vmv.v.x v25, a1 +; LMULMAX1-RV32-NEXT: vmsne.vi v26, v25, 0 +; LMULMAX1-RV32-NEXT: addi a1, a0, 6 +; LMULMAX1-RV32-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV32-NEXT: addi a1, a0, 4 +; LMULMAX1-RV32-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV32-NEXT: addi a1, a0, 2 +; LMULMAX1-RV32-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV32-NEXT: vse1.v v26, (a0) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: splat_v64i1: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: andi a1, a1, 1 +; LMULMAX1-RV64-NEXT: vsetivli a2, 16, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: vmv.v.x v25, a1 +; LMULMAX1-RV64-NEXT: vmsne.vi v26, v25, 0 +; LMULMAX1-RV64-NEXT: addi a1, a0, 6 +; LMULMAX1-RV64-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV64-NEXT: addi a1, a0, 4 +; LMULMAX1-RV64-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV64-NEXT: addi a1, a0, 2 +; LMULMAX1-RV64-NEXT: vse1.v v26, (a1) +; LMULMAX1-RV64-NEXT: vse1.v v26, (a0) +; LMULMAX1-RV64-NEXT: ret + %a = insertelement <64 x i1> undef, i1 %y, i32 0 + %b = shufflevector <64 x i1> %a, <64 x i1> undef, <64 x i32> zeroinitializer + store <64 x i1> %b, <64 x i1>* %x + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll @@ -24,6 +24,34 @@ ret %splat } +define @vsplat_nxv1i1_2(i1 %x) { +; CHECK-LABEL: vsplat_nxv1i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 %x, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +} + +define @vsplat_nxv1i1_3(i32 signext %x, i32 signext %y) { +; CHECK-LABEL: vsplat_nxv1i1_3: +; CHECK: # %bb.0: +; CHECK-NEXT: xor a0, a0, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %c = icmp ne i32 %x, %y + %head = insertelement undef, i1 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +} + define @vsplat_nxv2i1_0() { ; CHECK-LABEL: vsplat_nxv2i1_0: ; CHECK: # %bb.0: @@ -46,6 +74,19 @@ ret %splat } +define @vsplat_nxv2i1_2(i1 %x) { +; CHECK-LABEL: vsplat_nxv2i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 %x, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +} + define @vsplat_nxv4i1_0() { ; CHECK-LABEL: vsplat_nxv4i1_0: ; CHECK: # %bb.0: @@ -68,6 +109,19 @@ ret %splat } +define @vsplat_nxv4i1_2(i1 %x) { +; CHECK-LABEL: vsplat_nxv4i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 %x, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +} + define @vsplat_nxv8i1_0() { ; CHECK-LABEL: vsplat_nxv8i1_0: ; CHECK: # %bb.0: @@ -90,6 +144,19 @@ ret %splat } +define @vsplat_nxv8i1_2(i1 %x) { +; CHECK-LABEL: vsplat_nxv8i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: vmsne.vi v0, v25, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 %x, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +} + define @vsplat_nxv16i1_0() { ; CHECK-LABEL: vsplat_nxv16i1_0: ; CHECK: # %bb.0: @@ -111,3 +178,16 @@ %splat = shufflevector %head, undef, zeroinitializer ret %splat } + +define @vsplat_nxv16i1_2(i1 %x) { +; CHECK-LABEL: vsplat_nxv16i1_2: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; CHECK-NEXT: vmv.v.x v26, a0 +; CHECK-NEXT: vmsne.vi v0, v26, 0 +; CHECK-NEXT: ret + %head = insertelement undef, i1 %x, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + ret %splat +}