diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -133,6 +133,7 @@ /// Implement vselect in terms of XOR, AND, OR when blend is not /// supported by the target. SDValue ExpandVSELECT(SDNode *Node); + SDValue ExpandVP_SELECT(SDNode *Node); SDValue ExpandSELECT(SDNode *Node); std::pair ExpandLoad(SDNode *N); SDValue ExpandStore(SDNode *N); @@ -349,6 +350,7 @@ case ISD::CTPOP: case ISD::SELECT: case ISD::VSELECT: + case ISD::VP_SELECT: case ISD::SELECT_CC: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: @@ -718,6 +720,9 @@ case ISD::VSELECT: Results.push_back(ExpandVSELECT(Node)); return; + case ISD::VP_SELECT: + Results.push_back(ExpandVP_SELECT(Node)); + return; case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; @@ -1195,6 +1200,37 @@ return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); } +SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) { + // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which + // do not support it natively. + SDLoc DL(Node); + + SDValue Mask = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue Op2 = Node->getOperand(2); + SDValue EVL = Node->getOperand(3); + + EVT VT = Mask.getValueType(); + + // If we can't even use the basic vector operations of + // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op. + if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Node); + + // This operation also isn't safe when the operands aren't also booleans. + if (Op1.getValueType().getVectorElementType() != MVT::i1) + return DAG.UnrollVectorOp(Node); + + SDValue Ones = DAG.getAllOnesConstant(DL, VT); + SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL); + + Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL); + Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL); + return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL); +} + void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl &Results) { // Attempt to expand using TargetLowering. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -546,6 +546,7 @@ setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::VP_SELECT, VT, Expand); setOperationAction(ISD::VP_AND, VT, Custom); setOperationAction(ISD::VP_OR, VT, Custom); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -4,6 +4,76 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s +declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @select_v1i1(<1 x i1> %a, <1 x i1> %b, <1 x i1> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.select.v1i1(<1 x i1> %a, <1 x i1> %b, <1 x i1> %c, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.select.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @select_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.select.v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %c, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.select.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @select_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.select.v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.select.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @select_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.select.v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.select.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @select_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %c, i32 zeroext %evl) { +; CHECK-LABEL: select_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.select.v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %c, i32 %evl) + ret <16 x i1> %v +} + declare <2 x i8> @llvm.vp.select.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) define <2 x i8> @select_v2i8(<2 x i1> %a, <2 x i8> %b, <2 x i8> %c, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -4,6 +4,104 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s +declare @llvm.vp.select.nxv1i1(, , , i32) + +define @select_nxv1i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv1i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv2i1(, , , i32) + +define @select_nxv2i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv2i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv4i1(, , , i32) + +define @select_nxv4i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv4i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv8i1(, , , i32) + +define @select_nxv8i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv8i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv16i1(, , , i32) + +define @select_nxv16i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv16i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv32i1(, , , i32) + +define @select_nxv32i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv32i1( %a, %b, %c, i32 %evl) + ret %v +} + +declare @llvm.vp.select.nxv64i1(, , , i32) + +define @select_nxv64i1( %a, %b, %c, i32 zeroext %evl) { +; CHECK-LABEL: select_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmandn.mm v9, v9, v0 +; CHECK-NEXT: vmand.mm v8, v8, v0 +; CHECK-NEXT: vmor.mm v0, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.vp.select.nxv64i1( %a, %b, %c, i32 %evl) + ret %v +} + declare @llvm.vp.select.nxv1i8(, , , i32) define @select_nxv1i8( %a, %b, %c, i32 zeroext %evl) {