diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -226,6 +226,9 @@ VSEXT_VL, VZEXT_VL, + // vpopc.m with additional mask and VL operands. + VPOPC_VL, + // Memory opcodes start here. VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE, VSE_VL, @@ -490,6 +493,7 @@ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorMaskVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -441,6 +441,10 @@ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::VECREDUCE_AND, VT, Custom); + setOperationAction(ISD::VECREDUCE_OR, VT, Custom); + setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); + // Expand all extending loads to types larger than this, and truncating // stores from types larger than this. for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) { @@ -621,6 +625,10 @@ setOperationAction(ISD::BITCAST, VT, Custom); + setOperationAction(ISD::VECREDUCE_AND, VT, Custom); + setOperationAction(ISD::VECREDUCE_OR, VT, Custom); + setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); + // Operations below are different for between masks and other vectors. if (VT.getVectorElementType() == MVT::i1) { setOperationAction(ISD::AND, VT, Custom); @@ -673,9 +681,6 @@ // Custom-lower reduction operations to set up the corresponding custom // nodes' operands. setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); - setOperationAction(ISD::VECREDUCE_AND, VT, Custom); - setOperationAction(ISD::VECREDUCE_OR, VT, Custom); - setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); @@ -1893,9 +1898,12 @@ case ISD::VECREDUCE_SMAX: case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_SMIN: + return lowerVECREDUCE(Op, DAG); case ISD::VECREDUCE_AND: case ISD::VECREDUCE_OR: case ISD::VECREDUCE_XOR: + if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1) + return lowerVectorMaskVECREDUCE(Op, DAG); return lowerVECREDUCE(Op, DAG); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_SEQ_FADD: @@ -2993,6 +3001,60 @@ } } +SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Vec = Op.getOperand(0); + MVT VecVT = Vec.getSimpleValueType(); + assert((Op.getOpcode() == ISD::VECREDUCE_AND || + Op.getOpcode() == ISD::VECREDUCE_OR || + Op.getOpcode() == ISD::VECREDUCE_XOR) && + "Unexpected reduction lowering"); + + MVT XLenVT = Subtarget.getXLenVT(); + assert(Op.getValueType() == XLenVT && + "Expected reduction output to be legalized to XLenVT"); + + MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VecVT); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } + + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); + SDValue Zero = DAG.getConstant(0, DL, XLenVT); + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unhandled reduction"); + case ISD::VECREDUCE_AND: + // vpopc !x == 0 + Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, Mask, VL); + Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL); + return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETEQ); + case ISD::VECREDUCE_OR: + // vpopc x != 0 + Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL); + return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE); + case ISD::VECREDUCE_XOR: { + // vpopc != 0 && vpopc != vl + Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL); + // Compute the actual vector length to use in comparisons, which is not + // 'X0' as VL currently set to for scalable vector types. Note that it + // would be more optimal if we could use the vector length produced by the + // implicit vsetvli before the VPOPC_VL. + if (!VecVT.isFixedLengthVector()) + VL = DAG.getNode( + ISD::VSCALE, DL, XLenVT, + DAG.getConstant(VecVT.getVectorMinNumElements(), DL, XLenVT)); + SDValue LHS = DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE); + SDValue RHS = DAG.getSetCC(DL, XLenVT, Vec, VL, ISD::SETNE); + return DAG.getNode(ISD::AND, DL, XLenVT, LHS, RHS); + } + } +} + SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -7037,6 +7099,7 @@ NODE_NAME_CASE(VRGATHEREI16_VV_VL) NODE_NAME_CASE(VSEXT_VL) NODE_NAME_CASE(VZEXT_VL) + NODE_NAME_CASE(VPOPC_VL) NODE_NAME_CASE(VLE_VL) NODE_NAME_CASE(VSE_VL) } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -196,6 +196,13 @@ def riscv_vmnot_vl : PatFrag<(ops node:$rs, node:$vl), (riscv_vmxor_vl node:$rs, true_mask, node:$vl)>; +def riscv_vpopc_vl : SDNode<"RISCVISD::VPOPC_VL", + SDTypeProfile<1, 3, [SDTCisVT<0, XLenVT>, + SDTCisVec<1>, SDTCisInt<1>, + SDTCVecEltisVT<2, i1>, + SDTCisSameNumEltsAs<1, 2>, + SDTCisVT<3, XLenVT>]>>; + def SDT_RISCVVEXTEND_VL : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameNumEltsAs<0, 1>, SDTCisSameNumEltsAs<1, 2>, @@ -1028,6 +1035,11 @@ def : Pat<(mti.Mask (riscv_vmnot_vl VR:$rs, (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMNAND_MM_" # mti.LMul.MX) VR:$rs, VR:$rs, GPR:$vl, mti.SEW)>; + + def : Pat<(XLenVT (riscv_vpopc_vl (mti.Mask VR:$rs2), + (mti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVPOPC_M_" # mti.BX) VR:$rs2, GPR:$vl, mti.SEW)>; } } // Predicates = [HasStdExtV] diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll @@ -0,0 +1,385 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 + +declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>) + +define signext i1 @vreduce_or_v1i1(<1 x i1> %v) { +; CHECK-LABEL: vreduce_or_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1>) + +define signext i1 @vreduce_xor_v1i1(<1 x i1> %v) { +; CHECK-LABEL: vreduce_xor_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>) + +define signext i1 @vreduce_and_v1i1(<1 x i1> %v) { +; CHECK-LABEL: vreduce_and_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: vmerge.vim v25, v25, 1, v0 +; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>) + +define signext i1 @vreduce_or_v2i1(<2 x i1> %v) { +; CHECK-LABEL: vreduce_or_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>) + +define signext i1 @vreduce_xor_v2i1(<2 x i1> %v) { +; CHECK-LABEL: vreduce_xor_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: andi a0, a0, -3 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>) + +define signext i1 @vreduce_and_v2i1(<2 x i1> %v) { +; CHECK-LABEL: vreduce_and_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>) + +define signext i1 @vreduce_or_v4i1(<4 x i1> %v) { +; CHECK-LABEL: vreduce_or_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>) + +define signext i1 @vreduce_xor_v4i1(<4 x i1> %v) { +; CHECK-LABEL: vreduce_xor_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: andi a0, a0, -5 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>) + +define signext i1 @vreduce_and_v4i1(<4 x i1> %v) { +; CHECK-LABEL: vreduce_and_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1>) + +define signext i1 @vreduce_or_v8i1(<8 x i1> %v) { +; CHECK-LABEL: vreduce_or_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>) + +define signext i1 @vreduce_xor_v8i1(<8 x i1> %v) { +; CHECK-LABEL: vreduce_xor_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: andi a0, a0, -9 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>) + +define signext i1 @vreduce_and_v8i1(<8 x i1> %v) { +; CHECK-LABEL: vreduce_and_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1>) + +define signext i1 @vreduce_or_v16i1(<16 x i1> %v) { +; CHECK-LABEL: vreduce_or_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>) + +define signext i1 @vreduce_xor_v16i1(<16 x i1> %v) { +; CHECK-LABEL: vreduce_xor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: andi a0, a0, -17 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>) + +define signext i1 @vreduce_and_v16i1(<16 x i1> %v) { +; CHECK-LABEL: vreduce_and_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>) + +define signext i1 @vreduce_or_v32i1(<32 x i1> %v) { +; LMULMAX1-LABEL: vreduce_or_v32i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vmor.mm v25, v0, v8 +; LMULMAX1-NEXT: vpopc.m a0, v25 +; LMULMAX1-NEXT: snez a0, a0 +; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_or_v32i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a0, zero, 32 +; LMULMAX8-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; LMULMAX8-NEXT: vpopc.m a0, v0 +; LMULMAX8-NEXT: snez a0, a0 +; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>) + +define signext i1 @vreduce_xor_v32i1(<32 x i1> %v) { +; LMULMAX1-LABEL: vreduce_xor_v32i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vmxor.mm v25, v0, v8 +; LMULMAX1-NEXT: vpopc.m a0, v25 +; LMULMAX1-NEXT: andi a0, a0, -17 +; LMULMAX1-NEXT: snez a0, a0 +; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_xor_v32i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a0, zero, 32 +; LMULMAX8-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; LMULMAX8-NEXT: vpopc.m a0, v0 +; LMULMAX8-NEXT: andi a0, a0, -33 +; LMULMAX8-NEXT: snez a0, a0 +; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>) + +define signext i1 @vreduce_and_v32i1(<32 x i1> %v) { +; LMULMAX1-LABEL: vreduce_and_v32i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vmnand.mm v25, v0, v8 +; LMULMAX1-NEXT: vpopc.m a0, v25 +; LMULMAX1-NEXT: seqz a0, a0 +; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_and_v32i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a0, zero, 32 +; LMULMAX8-NEXT: vsetvli a0, a0, e8,m2,ta,mu +; LMULMAX8-NEXT: vmnand.mm v25, v0, v0 +; LMULMAX8-NEXT: vpopc.m a0, v25 +; LMULMAX8-NEXT: seqz a0, a0 +; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>) + +define signext i1 @vreduce_or_v64i1(<64 x i1> %v) { +; LMULMAX1-LABEL: vreduce_or_v64i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vmor.mm v25, v8, v10 +; LMULMAX1-NEXT: vmor.mm v26, v0, v9 +; LMULMAX1-NEXT: vmor.mm v25, v26, v25 +; LMULMAX1-NEXT: vpopc.m a0, v25 +; LMULMAX1-NEXT: snez a0, a0 +; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_or_v64i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a0, zero, 64 +; LMULMAX8-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; LMULMAX8-NEXT: vpopc.m a0, v0 +; LMULMAX8-NEXT: snez a0, a0 +; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>) + +define signext i1 @vreduce_xor_v64i1(<64 x i1> %v) { +; LMULMAX1-LABEL: vreduce_xor_v64i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vmxor.mm v25, v8, v10 +; LMULMAX1-NEXT: vmxor.mm v26, v0, v9 +; LMULMAX1-NEXT: vmxor.mm v25, v26, v25 +; LMULMAX1-NEXT: vpopc.m a0, v25 +; LMULMAX1-NEXT: andi a0, a0, -17 +; LMULMAX1-NEXT: snez a0, a0 +; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_xor_v64i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a0, zero, 64 +; LMULMAX8-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; LMULMAX8-NEXT: vpopc.m a0, v0 +; LMULMAX8-NEXT: andi a0, a0, -65 +; LMULMAX8-NEXT: snez a0, a0 +; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>) + +define signext i1 @vreduce_and_v64i1(<64 x i1> %v) { +; LMULMAX1-LABEL: vreduce_and_v64i1: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu +; LMULMAX1-NEXT: vmand.mm v25, v8, v10 +; LMULMAX1-NEXT: vmand.mm v26, v0, v9 +; LMULMAX1-NEXT: vmnand.mm v25, v26, v25 +; LMULMAX1-NEXT: vpopc.m a0, v25 +; LMULMAX1-NEXT: seqz a0, a0 +; LMULMAX1-NEXT: neg a0, a0 +; LMULMAX1-NEXT: ret +; +; LMULMAX8-LABEL: vreduce_and_v64i1: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi a0, zero, 64 +; LMULMAX8-NEXT: vsetvli a0, a0, e8,m4,ta,mu +; LMULMAX8-NEXT: vmnand.mm v25, v0, v0 +; LMULMAX8-NEXT: vpopc.m a0, v25 +; LMULMAX8-NEXT: seqz a0, a0 +; LMULMAX8-NEXT: neg a0, a0 +; LMULMAX8-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v) + ret i1 %red +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll @@ -0,0 +1,338 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s + +declare i1 @llvm.vector.reduce.or.nxv1i1() + +define signext i1 @vreduce_or_nxv1i1( %v) { +; CHECK-LABEL: vreduce_or_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.nxv1i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.nxv1i1() + +define signext i1 @vreduce_xor_nxv1i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu +; CHECK-NEXT: vpopc.m a1, v0 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.nxv1i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.nxv1i1() + +define signext i1 @vreduce_and_nxv1i1( %v) { +; CHECK-LABEL: vreduce_and_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.nxv1i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.nxv2i1() + +define signext i1 @vreduce_or_nxv2i1( %v) { +; CHECK-LABEL: vreduce_or_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.nxv2i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.nxv2i1() + +define signext i1 @vreduce_xor_nxv2i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu +; CHECK-NEXT: vpopc.m a1, v0 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.nxv2i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.nxv2i1() + +define signext i1 @vreduce_and_nxv2i1( %v) { +; CHECK-LABEL: vreduce_and_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.nxv2i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.nxv4i1() + +define signext i1 @vreduce_or_nxv4i1( %v) { +; CHECK-LABEL: vreduce_or_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.nxv4i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.nxv4i1() + +define signext i1 @vreduce_xor_nxv4i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu +; CHECK-NEXT: vpopc.m a1, v0 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.nxv4i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.nxv4i1() + +define signext i1 @vreduce_and_nxv4i1( %v) { +; CHECK-LABEL: vreduce_and_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.nxv4i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.nxv8i1() + +define signext i1 @vreduce_or_nxv8i1( %v) { +; CHECK-LABEL: vreduce_or_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.nxv8i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.nxv8i1() + +define signext i1 @vreduce_xor_nxv8i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: xor a1, a0, a1 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.nxv8i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.nxv8i1() + +define signext i1 @vreduce_and_nxv8i1( %v) { +; CHECK-LABEL: vreduce_and_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.nxv8i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.nxv16i1() + +define signext i1 @vreduce_or_nxv16i1( %v) { +; CHECK-LABEL: vreduce_or_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.nxv16i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.nxv16i1() + +define signext i1 @vreduce_xor_nxv16i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu +; CHECK-NEXT: vpopc.m a1, v0 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.nxv16i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.nxv16i1() + +define signext i1 @vreduce_and_nxv16i1( %v) { +; CHECK-LABEL: vreduce_and_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.nxv16i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.nxv32i1() + +define signext i1 @vreduce_or_nxv32i1( %v) { +; CHECK-LABEL: vreduce_or_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.nxv32i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.nxv32i1() + +define signext i1 @vreduce_xor_nxv32i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu +; CHECK-NEXT: vpopc.m a1, v0 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.nxv32i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.nxv32i1() + +define signext i1 @vreduce_and_nxv32i1( %v) { +; CHECK-LABEL: vreduce_and_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.nxv32i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.or.nxv64i1() + +define signext i1 @vreduce_or_nxv64i1( %v) { +; CHECK-LABEL: vreduce_or_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.or.nxv64i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.xor.nxv64i1() + +define signext i1 @vreduce_xor_nxv64i1( %v) { +; CHECK-LABEL: vreduce_xor_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu +; CHECK-NEXT: vpopc.m a1, v0 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.xor.nxv64i1( %v) + ret i1 %red +} + +declare i1 @llvm.vector.reduce.and.nxv64i1() + +define signext i1 @vreduce_and_nxv64i1( %v) { +; CHECK-LABEL: vreduce_and_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu +; CHECK-NEXT: vmnand.mm v25, v0, v0 +; CHECK-NEXT: vpopc.m a0, v25 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: neg a0, a0 +; CHECK-NEXT: ret + %red = call i1 @llvm.vector.reduce.and.nxv64i1( %v) + ret i1 %red +}