diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -983,6 +983,7 @@ SplitVecRes_Gather(cast(N), Lo, Hi, /*SplitSETCC*/ true); break; case ISD::SETCC: + case ISD::VP_SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; case ISD::VECTOR_REVERSE: @@ -2108,8 +2109,24 @@ else std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + if (N->getOpcode() != ISD::VP_SETCC) { + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + return; + } + + assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); + assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); + + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2), MaskLo, + EVLLo); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2), MaskHi, + EVLHi); } void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, @@ -2644,6 +2661,7 @@ report_fatal_error("Do not know how to split this operator's " "operand!\n"); + case ISD::VP_SETCC: case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; @@ -3478,8 +3496,21 @@ EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt); EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2); - LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); - HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + if (N->getOpcode() == ISD::SETCC) { + LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); + HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + } + + if (N->getOpcode() == ISD::VP_SETCC) { + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL); + LoRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Lo0, Lo1, + N->getOperand(2), MaskLo, EVLLo); + HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1, + N->getOperand(2), MaskHi, EVLHi); + } SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); EVT OpVT = N->getOperand(0).getValueType(); @@ -3585,6 +3616,7 @@ Res = WidenVecRes_Select(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::VP_SETCC: case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: @@ -5248,8 +5280,17 @@ InOp2.getValueType() == WidenInVT && "Input not widened to expected type!"); (void)WidenInVT; - return DAG.getNode(ISD::SETCC, SDLoc(N), - WidenVT, InOp1, InOp2, N->getOperand(2)); + if (N->getNumOperands() == 3) + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, + N->getOperand(2)); + + assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); + assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); + + SDValue Mask = + GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount()); + return DAG.getNode(ISD::VP_SETCC, SDLoc(N), WidenVT, InOp1, InOp2, + N->getOperand(2), Mask, N->getOperand(4)); } SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 ; FIXME: We're missing canonicalizations of ISD::VP_SETCC equivalent to those @@ -57,6 +57,42 @@ ret <8 x i1> %v } +declare <5 x i1> @llvm.vp.icmp.v5i8(<5 x i8>, <5 x i8>, metadata, <5 x i1>, i32) + +define <5 x i1> @icmp_eq_vv_v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vv_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmseq.vv v0, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <5 x i1> @llvm.vp.icmp.v5i8(<5 x i8> %va, <5 x i8> %vb, metadata !"eq", <5 x i1> %m, i32 %evl) + ret <5 x i1> %v +} + +define <5 x i1> @icmp_eq_vx_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i1> @llvm.vp.icmp.v5i8(<5 x i8> %va, <5 x i8> %vb, metadata !"eq", <5 x i1> %m, i32 %evl) + ret <5 x i1> %v +} + +define <5 x i1> @icmp_eq_vx_swap_v5i8(<5 x i8> %va, i8 %b, <5 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_swap_v5i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer + %v = call <5 x i1> @llvm.vp.icmp.v5i8(<5 x i8> %vb, <5 x i8> %va, metadata !"eq", <5 x i1> %m, i32 %evl) + ret <5 x i1> %v +} + declare <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8>, <8 x i8>, metadata, <8 x i1>, i32) define <8 x i1> @icmp_eq_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { @@ -587,6 +623,138 @@ ret <8 x i1> %v } +declare <256 x i1> @llvm.vp.icmp.v256i8(<256 x i8>, <256 x i8>, metadata, <256 x i1>, i32) + +define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vv_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: li a4, 128 +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu +; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: bltu a3, a4, .LBB51_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: .LBB51_2: +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: vlm.v v24, (a2) +; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: addi a0, a3, -128 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vv v1, v8, v16, v0.t +; CHECK-NEXT: bltu a3, a0, .LBB51_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a4, a0 +; CHECK-NEXT: .LBB51_4: +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vv v8, v16, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <256 x i1> @llvm.vp.icmp.v256i8(<256 x i8> %va, <256 x i8> %vb, metadata !"eq", <256 x i1> %m, i32 %evl) + ret <256 x i1> %v +} + +define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v25, (a1) +; CHECK-NEXT: addi a4, a2, -128 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: bltu a2, a4, .LBB52_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: .LBB52_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB52_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: .LBB52_4: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i1> @llvm.vp.icmp.v256i8(<256 x i8> %va, <256 x i8> %vb, metadata !"eq", <256 x i1> %m, i32 %evl) + ret <256 x i1> %v +} + +define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_swap_v256i8: +; CHECK: # %bb.0: +; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v25, (a1) +; CHECK-NEXT: addi a4, a2, -128 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: bltu a2, a4, .LBB53_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: .LBB53_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB53_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: .LBB53_4: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: ret + %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer + %v = call <256 x i1> @llvm.vp.icmp.v256i8(<256 x i8> %vb, <256 x i8> %va, metadata !"eq", <256 x i1> %m, i32 %evl) + ret <256 x i1> %v +} + declare <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32>, <8 x i32>, metadata, <8 x i1>, i32) define <8 x i1> @icmp_eq_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) { @@ -1162,6 +1330,120 @@ ret <8 x i1> %v } +declare <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32>, <64 x i32>, metadata, <64 x i1>, i32) + +define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vv_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: addi a4, a0, 128 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v24, (a4) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu +; CHECK-NEXT: addi a4, a2, -32 +; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: bltu a2, a4, .LBB99_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB99_2: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmseq.vv v2, v16, v24, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB99_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: .LBB99_4: +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v16, v2, 4 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32> %va, <64 x i32> %vb, metadata !"eq", <64 x i1> %m, i32 %evl) + ret <64 x i1> %v +} + +define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu +; CHECK-NEXT: addi a3, a1, -32 +; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: bltu a1, a3, .LBB100_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB100_2: +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB100_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: .LBB100_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v16, v25, 4 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer + %v = call <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32> %va, <64 x i32> %vb, metadata !"eq", <64 x i1> %m, i32 %evl) + ret <64 x i1> %v +} + +define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_swap_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu +; CHECK-NEXT: addi a3, a1, -32 +; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: bltu a1, a3, .LBB101_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB101_2: +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a2, .LBB101_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: .LBB101_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v16, v25, 4 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: ret + %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer + %v = call <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32> %vb, <64 x i32> %va, metadata !"eq", <64 x i1> %m, i32 %evl) + ret <64 x i1> %v +} + declare <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64>, <8 x i64>, metadata, <8 x i1>, i32) define <8 x i1> @icmp_eq_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 ; FIXME: We're missing canonicalizations of ISD::VP_SETCC equivalent to those @@ -537,6 +537,42 @@ ret %v } +declare @llvm.vp.icmp.nxv3i8(, , metadata, , i32) + +define @icmp_eq_vv_nxv3i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vv_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vmseq.vv v0, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.icmp.nxv3i8( %va, %vb, metadata !"eq", %m, i32 %evl) + ret %v +} + +define @icmp_eq_vx_nxv3i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.icmp.nxv3i8( %va, %vb, metadata !"eq", %m, i32 %evl) + ret %v +} + +define @icmp_eq_vx_swap_nxv3i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_swap_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.icmp.nxv3i8( %vb, %va, metadata !"eq", %m, i32 %evl) + ret %v +} + declare @llvm.vp.icmp.nxv8i7(, , metadata, , i32) define @icmp_eq_vv_nxv8i7( %va, %vb, %m, i32 zeroext %evl) { @@ -1117,6 +1153,139 @@ ret %v } +declare @llvm.vp.icmp.nxv128i8(, , metadata, , i32) + +define @icmp_eq_vv_nxv128i8( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vv_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a4, a0, a1 +; CHECK-NEXT: vl8r.v v24, (a4) +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: add a4, sp, a4 +; CHECK-NEXT: addi a4, a4, 16 +; CHECK-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a4, zero, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v25, (a2) +; CHECK-NEXT: sub a4, a3, a1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: bltu a3, a4, .LBB96_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a4 +; CHECK-NEXT: .LBB96_2: +; CHECK-NEXT: vl8r.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t +; CHECK-NEXT: bltu a3, a1, .LBB96_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: .LBB96_4: +; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmv1r.v v8, v1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.vp.icmp.nxv128i8( %va, %vb, metadata !"eq", %m, i32 %evl) + ret %v +} + +define @icmp_eq_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: bltu a2, a3, .LBB97_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: .LBB97_2: +; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: sub a1, a2, a3 +; CHECK-NEXT: vmseq.vx v25, v8, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB97_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: .LBB97_4: +; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i8 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.icmp.nxv128i8( %va, %vb, metadata !"eq", %m, i32 %evl) + ret %v +} + +define @icmp_eq_vx_swap_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_swap_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: mv a4, a2 +; CHECK-NEXT: bltu a2, a3, .LBB98_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a3 +; CHECK-NEXT: .LBB98_2: +; CHECK-NEXT: li a5, 0 +; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, mu +; CHECK-NEXT: vlm.v v24, (a1) +; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma +; CHECK-NEXT: sub a1, a2, a3 +; CHECK-NEXT: vmseq.vx v25, v8, a0, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB98_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: .LBB98_4: +; CHECK-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i8 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.icmp.nxv128i8( %vb, %va, metadata !"eq", %m, i32 %evl) + ret %v +} + declare @llvm.vp.icmp.nxv1i32(, , metadata, , i32) define @icmp_eq_vv_nxv1i32( %va, %vb, %m, i32 zeroext %evl) { @@ -2222,6 +2391,128 @@ ret %v } +declare @llvm.vp.icmp.nxv32i32(, , metadata, , i32) + +define @icmp_eq_vv_nxv32i32( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vv_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a1, a3, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, mu +; CHECK-NEXT: slli a5, a3, 3 +; CHECK-NEXT: add a5, a0, a5 +; CHECK-NEXT: vl8re32.v v24, (a5) +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub a5, a2, a3 +; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: bltu a2, a5, .LBB189_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a5 +; CHECK-NEXT: .LBB189_2: +; CHECK-NEXT: vl8re32.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; CHECK-NEXT: vmseq.vv v2, v16, v24, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB189_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB189_4: +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vv v16, v24, v8, v0.t +; CHECK-NEXT: add a0, a1, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v16, v2, a1 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.vp.icmp.nxv32i32( %va, %vb, metadata !"eq", %m, i32 %evl) + ret %v +} + +define @icmp_eq_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a2, a3, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, mu +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub a5, a1, a3 +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: bltu a1, a5, .LBB190_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a5 +; CHECK-NEXT: .LBB190_2: +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a3, .LBB190_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: .LBB190_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: add a0, a2, a2 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v16, v25, a2 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.icmp.nxv32i32( %va, %vb, metadata !"eq", %m, i32 %evl) + ret %v +} + +define @icmp_eq_vx_swap_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { +; CHECK-LABEL: icmp_eq_vx_swap_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a2, a3, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, mu +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub a5, a1, a3 +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: bltu a1, a5, .LBB191_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a5 +; CHECK-NEXT: .LBB191_2: +; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t +; CHECK-NEXT: bltu a1, a3, .LBB191_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: .LBB191_4: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: add a0, a2, a2 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v16, v25, a2 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.icmp.nxv32i32( %vb, %va, metadata !"eq", %m, i32 %evl) + ret %v +} + declare @llvm.vp.icmp.nxv1i64(, , metadata, , i32) define @icmp_eq_vv_nxv1i64( %va, %vb, %m, i32 zeroext %evl) {