diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -130,6 +130,7 @@ private: bool doPeepholeSExtW(SDNode *Node); bool doPeepholeMaskedRVV(SDNode *Node); + bool doPeepholeMergeVVMFold(); }; namespace RISCV { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -151,6 +151,8 @@ CurDAG->setRoot(Dummy.getValue()); + MadeChange |= doPeepholeMergeVVMFold(); + if (MadeChange) CurDAG->RemoveDeadNodes(); } @@ -2511,6 +2513,115 @@ return true; } +// Try to fold MERGE_VVM with unmasked intrinsic to masked intrinsic. The +// peephole only deals with MERGE_VVM which is TU and has false operand same as +// its true operand now. E.g. (VMERGE_VVM_M1_TU False, False, (VADD_M1 ...), +// ...) -> (VADD_VV_M1_MASK) +bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { + bool MadeChange = false; + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + auto IsVMergeTU = [](unsigned Opcode) { + return Opcode == RISCV::PseudoVMERGE_VVM_MF8_TU || + Opcode == RISCV::PseudoVMERGE_VVM_MF4_TU || + Opcode == RISCV::PseudoVMERGE_VVM_MF2_TU || + Opcode == RISCV::PseudoVMERGE_VVM_M1_TU || + Opcode == RISCV::PseudoVMERGE_VVM_M2_TU || + Opcode == RISCV::PseudoVMERGE_VVM_M4_TU || + Opcode == RISCV::PseudoVMERGE_VVM_M8_TU; + }; + + unsigned Opc = N->getMachineOpcode(); + // TODO: Also deal with TA VMerge nodes. + if (!IsVMergeTU(Opc)) + continue; + + SDValue Merge = N->getOperand(0); + SDValue False = N->getOperand(1); + SDValue True = N->getOperand(2); + SDValue Mask = N->getOperand(3); + SDValue VL = N->getOperand(4); + + if (Merge != False) + continue; + + // Need N is the exactly one using True. + if (!True.hasOneUse()) + continue; + + if (!True.isMachineOpcode()) + continue; + + unsigned TrueOpc = True.getMachineOpcode(); + + // Skip if True has merge operand. + // TODO: Deal with True having same merge operand with N. + if (RISCVII::hasMergeOp(TII->get(TrueOpc).TSFlags)) + continue; + + // Skip if True has side effect. + // TODO: Support velff and vlsegff. + if (TII->get(TrueOpc).hasUnmodeledSideEffects()) + continue; + + // Only deal with True when True is unmasked intrinsic now. + const RISCV::RISCVMaskedPseudoInfo *Info = + RISCV::lookupMaskedIntrinsicByUnmaskedTA(TrueOpc); + + if (!Info) + continue; + + // The last operand of unmasked intrinsic should be sew or chain. + bool HasChainOp = + True.getOperand(True.getNumOperands() - 1).getValueType() == MVT::Other; + + // Need True has same VL with N. + unsigned TrueVLIndex = True.getNumOperands() - HasChainOp - 2; + SDValue TrueVL = True.getOperand(TrueVLIndex); + if (TrueVL != VL) + continue; + + SDLoc DL(N); + unsigned MaskedOpc = Info->MaskedPseudo; + SmallVector Ops; + Ops.push_back(Merge); + Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex); + Ops.append({Mask, VL, /* SEW */ True.getOperand(TrueVLIndex + 1)}); + + if (RISCVII::hasVecPolicyOp(TII->get(MaskedOpc).TSFlags)) + Ops.push_back( + CurDAG->getTargetConstant(/* TUMU */ 0, DL, Subtarget->getXLenVT())); + + // Result node should have chain operand of True. + if (HasChainOp) + Ops.push_back(True.getOperand(True.getNumOperands() - 1)); + + // Result node should take over glued node of N. + if (N->getGluedNode()) + Ops.push_back(N->getOperand(N->getNumOperands() - 1)); + + SDNode *Result = + CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); + + // Replace vmerge.vvm node by Result. + ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); + + // Replace another result value of True. + for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) + ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); + + // Try to transform Result to unmasked intrinsic. + doPeepholeMaskedRVV(Result); + MadeChange = true; + } + return MadeChange; +} + // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -472,6 +472,11 @@ Pseudo Pseudo = !cast(NAME); } +def lookupMaskedIntrinsicByUnmaskedTA : SearchIndex { + let Table = RISCVMaskedPseudosTable; + let Key = ["UnmaskedPseudo"]; +} + def RISCVVLETable : GenericTable { let FilterClass = "RISCVVLE"; let CppTypeName = "VLEPseudo"; @@ -1591,6 +1596,7 @@ VLESched; def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSLoadMask, + RISCVMaskedPseudo, VLESched; } } @@ -1611,6 +1617,7 @@ VLFSched; def "E" # eew # "FF_V_" # LInfo # "_MASK": VPseudoUSLoadFFMask, + RISCVMaskedPseudo, VLFSched; } } @@ -1635,8 +1642,10 @@ VLSSched; def "E" # eew # "_V_" # LInfo # "_TU": VPseudoSLoadNoMaskTU, VLSSched; - def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSLoadMask, - VLSSched; + def "E" # eew # "_V_" # LInfo # "_MASK" : + VPseudoSLoadMask, + RISCVMaskedPseudo, + VLSSched; } } } @@ -1666,6 +1675,7 @@ VLXSched; def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" : VPseudoILoadMask, + RISCVMaskedPseudo, VLXSched; } } @@ -1785,6 +1795,7 @@ def "_V_" # m.MX # "_TU": VPseudoNullaryNoMaskTU, Sched<[WriteVMIdxV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask, + RISCVMaskedPseudo, Sched<[WriteVMIdxV, ReadVMask]>; } } @@ -1807,6 +1818,7 @@ def "_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU, Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, + RISCVMaskedPseudo, Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; } } @@ -2137,6 +2149,7 @@ def "_V_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, + RISCVMaskedPseudo, Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; } } @@ -2150,6 +2163,7 @@ def "_V_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, + RISCVMaskedPseudo, Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; } } @@ -2163,6 +2177,7 @@ def "_V_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA, + RISCVMaskedPseudo, Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 -stop-after=finalize-isel | FileCheck %s --check-prefix=MIR declare <8 x i16> @llvm.vp.merge.nxv2i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) declare <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) @@ -11,11 +11,22 @@ define <8 x i32> @vpmerge_vpadd(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, <8 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpadd: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpadd + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v10, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY4:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVADD_VV_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVADD_VV_M1_MASK [[COPY4]], [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) @@ -29,11 +40,23 @@ ; CHECK-LABEL: vpmerge_vpadd2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v11, v9, v10 ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpadd2 + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v10, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[COPY2]], [[COPY1]], [[COPY]], 5 /* e32 */ + ; MIR-NEXT: $v0 = COPY [[PseudoVMSEQ_VV_M1_]] + ; MIR-NEXT: [[PseudoVADD_VV_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVADD_VV_M1_MASK [[COPY3]], [[COPY2]], [[COPY1]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) @@ -46,13 +69,20 @@ define <8 x i32> @vpmerge_vpadd3(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpadd3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpadd3 + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v10, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v8 + ; MIR-NEXT: [[PseudoVADD_VV_M1_TU:%[0-9]+]]:vr = PseudoVADD_VV_M1_TU [[COPY3]], [[COPY2]], [[COPY1]], [[COPY]], 5 /* e32 */ + ; MIR-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_TU]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) @@ -65,11 +95,22 @@ define <8 x float> @vpmerge_vpfadd(<8 x float> %passthru, <8 x float> %x, <8 x float> %y, <8 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfadd: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpfadd + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v10, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY4:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVFADD_VV_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVFADD_VV_M1_MASK [[COPY4]], [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0, implicit $frm + ; MIR-NEXT: $v8 = COPY [[PseudoVFADD_VV_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x float> @llvm.vp.fadd.nxv2f32(<8 x float> %x, <8 x float> %y, <8 x i1> %mask, i32 %vl) @@ -82,11 +123,21 @@ define <8 x i16> @vpmerge_vpfptosi(<8 x i16> %passthru, <8 x float> %x, <8 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfptosi: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpfptosi + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVFNCVT_RTZ_X_F_W_MF2_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 4 /* e16 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<8 x float> %x, <8 x i1> %mask, i32 %vl) @@ -99,11 +150,21 @@ define <8 x float> @vpmerge_vpsitofp(<8 x float> %passthru, <8 x i64> %x, <8 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpsitofp: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpsitofp + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v10m2, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrm2 = COPY $v10m2 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVFNCVT_F_X_W_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<8 x i64> %x, <8 x i1> %mask, i32 %vl) @@ -116,11 +177,21 @@ define <8 x i32> @vpmerge_vpzext(<8 x i32> %passthru, <8 x i8> %x, <8 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpzext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vzext.vf4 v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpzext + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVZEXT_VF4_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<8 x i8> %x, <8 x i1> %mask, i32 %vl) @@ -133,11 +204,21 @@ define <8 x i32> @vpmerge_vptrunc(<8 x i32> %passthru, <8 x i64> %x, <8 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vptrunc: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vncvt.x.x.w v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vncvt.x.x.w v8, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vptrunc + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v10m2, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrm2 = COPY $v10m2 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVNSRL_WX_M1_MASK [[COPY3]], [[COPY2]], $x0, $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<8 x i64> %x, <8 x i1> %mask, i32 %vl) @@ -150,11 +231,21 @@ define <8 x double> @vpmerge_vpfpext(<8 x double> %passthru, <8 x float> %x, <8 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfpext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpfpext + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8m2, $v10, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrm2nov0 = COPY $v8m2 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrm2nov0 = PseudoVFWCVT_F_F_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8m2 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8m2 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<8 x float> %x, <8 x i1> %mask, i32 %vl) @@ -167,11 +258,21 @@ define <8 x float> @vpmerge_vpfptrunc(<8 x float> %passthru, <8 x double> %x, <8 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfptrunc: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpfptrunc + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v10m2, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrm2 = COPY $v10m2 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVFNCVT_F_F_W_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<8 x double> %x, <8 x i1> %mask, i32 %vl) @@ -184,11 +285,21 @@ define <8 x i32> @vpmerge_vpload(<8 x i32> %passthru, <8 x i32> * %p, <8 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpload: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpload + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $x10, $v0, $x11 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVLE32_V_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<8 x i32> * %p, <8 x i1> %mask, i32 %vl) @@ -201,11 +312,24 @@ ; CHECK-LABEL: vpmerge_vpload2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v11, (a0) ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpload2 + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $x10, $v9, $v10, $x11 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; MIR-NEXT: [[COPY4:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[COPY2]], [[COPY1]], [[COPY]], 5 /* e32 */ + ; MIR-NEXT: $v0 = COPY [[PseudoVMSEQ_VV_M1_]] + ; MIR-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY4]], [[COPY3]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVLE32_V_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer %a = call <8 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<8 x i32> * %p, <8 x i1> %mask, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -stop-after=finalize-isel | FileCheck %s --check-prefix=MIR declare @llvm.vp.merge.nxv2i16(, , , i32) declare @llvm.vp.merge.nxv2i32(, , , i32) @@ -11,11 +11,22 @@ define @vpmerge_vpadd( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpadd: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpadd + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v10, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY4:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVADD_VV_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVADD_VV_M1_MASK [[COPY4]], [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) @@ -29,11 +40,23 @@ ; CHECK-LABEL: vpmerge_vpadd2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v11, v9, v10 ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpadd2 + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v10, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[COPY2]], [[COPY1]], [[COPY]], 5 /* e32 */ + ; MIR-NEXT: $v0 = COPY [[PseudoVMSEQ_VV_M1_]] + ; MIR-NEXT: [[PseudoVADD_VV_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVADD_VV_M1_MASK [[COPY3]], [[COPY2]], [[COPY1]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) @@ -42,17 +65,24 @@ ret %b } -; Test vp.merge have all-ones mask. +; Test vp.merge has all-ones mask. define @vpmerge_vpadd3( %passthru, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpadd3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmset.m v0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpadd3 + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v10, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v8 + ; MIR-NEXT: [[PseudoVADD_VV_M1_TU:%[0-9]+]]:vr = PseudoVADD_VV_M1_TU [[COPY3]], [[COPY2]], [[COPY1]], [[COPY]], 5 /* e32 */ + ; MIR-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_TU]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) @@ -65,11 +95,22 @@ define @vpmerge_vpfadd( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfadd: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vv v9, v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpfadd + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v10, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY4:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVFADD_VV_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVFADD_VV_M1_MASK [[COPY4]], [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0, implicit $frm + ; MIR-NEXT: $v8 = COPY [[PseudoVFADD_VV_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fadd.nxv2f32( %x, %y, %mask, i32 %vl) @@ -85,6 +126,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vrgatherei16 + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v9, $v10, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: early-clobber %5:vr = PseudoVRGATHEREI16_VV_M1_MF2 [[COPY2]], [[COPY1]], [[COPY]], 5 /* e32 */ + ; MIR-NEXT: $v8 = COPY %5 + ; MIR-NEXT: PseudoRET implicit $v8 %1 = zext i32 %vl to i64 %2 = tail call @llvm.riscv.vrgatherei16.vv.nxv2i32.i64( undef, %x, %y, i64 %1) %3 = tail call @llvm.vp.merge.nxv2i32( %m, %2, %passthru, i32 %vl) @@ -96,11 +147,21 @@ define @vpmerge_vpfptosi( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfptosi: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpfptosi + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVFNCVT_RTZ_X_F_W_MF2_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 4 /* e16 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fptosi.nxv2i16.nxv2f32( %x, %mask, i32 %vl) @@ -113,11 +174,21 @@ define @vpmerge_vpsitofp( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpsitofp: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpsitofp + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v10m2, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrm2 = COPY $v10m2 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVFNCVT_F_X_W_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.sitofp.nxv2f32.nxv2i64( %x, %mask, i32 %vl) @@ -130,11 +201,21 @@ define @vpmerge_vpzext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpzext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vzext.vf4 v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpzext + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVZEXT_VF4_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.zext.nxv2i32.nxv2i8( %x, %mask, i32 %vl) @@ -147,11 +228,21 @@ define @vpmerge_vptrunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vptrunc: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vncvt.x.x.w v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vncvt.x.x.w v8, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vptrunc + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v10m2, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrm2 = COPY $v10m2 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVNSRL_WX_M1_MASK [[COPY3]], [[COPY2]], $x0, $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.trunc.nxv2i32.nxv2i64( %x, %mask, i32 %vl) @@ -164,11 +255,21 @@ define @vpmerge_vpfpext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfpext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpfpext + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8m2, $v10, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrm2nov0 = COPY $v8m2 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrm2nov0 = PseudoVFWCVT_F_F_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8m2 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8m2 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fpext.nxv2f64.nxv2f32( %x, %mask, i32 %vl) @@ -181,11 +282,21 @@ define @vpmerge_vpfptrunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpfptrunc: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v9, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpfptrunc + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v10m2, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrm2 = COPY $v10m2 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVFNCVT_F_F_W_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.fptrunc.nxv2f32.nxv2f64( %x, %mask, i32 %vl) @@ -198,11 +309,21 @@ define @vpmerge_vpload( %passthru, * %p, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpload: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpload + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $x10, $v0, $x11 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVLE32_V_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.load.nxv2i32.p0nxv2i32( * %p, %mask, i32 %vl) @@ -210,16 +331,29 @@ ret %b } -; Test result have chain and glued node. +; Test result has chain and glued node. define @vpmerge_vpload2( %passthru, * %p, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vpload2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v11, (a0) ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpload2 + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $x10, $v9, $v10, $x11 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v10 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; MIR-NEXT: [[COPY4:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[COPY2]], [[COPY1]], [[COPY]], 5 /* e32 */ + ; MIR-NEXT: $v0 = COPY [[PseudoVMSEQ_VV_M1_]] + ; MIR-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY4]], [[COPY3]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVLE32_V_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer %a = call @llvm.vp.load.nxv2i32.p0nxv2i32( * %p, %mask, i32 %vl) @@ -228,6 +362,34 @@ ret %b } +; Test result has chain output of true operand of merge.vvm. +define void @vpmerge_vpload_store( %passthru, * %p, %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpload_store: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: vs1r.v v8, (a0) +; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vpload_store + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $x10, $v0, $x11 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store unknown-size into %ir.p, align 8) + ; MIR-NEXT: PseudoRET + %splat = insertelement poison, i1 -1, i32 0 + %mask = shufflevector %splat, poison, zeroinitializer + %a = call @llvm.vp.load.nxv2i32.p0nxv2i32( * %p, %mask, i32 %vl) + %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) + store %b, * %p + ret void +} + ; FIXME: Merge vmerge.vvm and vleffN.v declare { , i64 } @llvm.riscv.vleff.nxv2i32(, *, i64) define @vpmerge_vleff( %passthru, * %p, %m, i32 zeroext %vl) { @@ -238,6 +400,19 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vleff + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $x10, $v0, $x11 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: [[PseudoVLE32FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE32FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE32FF_V_M1 [[COPY2]], [[COPY]], 5 /* e32 */, implicit-def dead $vl + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVMERGE_VVM_M1_TU:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1_TU [[COPY3]], [[COPY3]], killed [[PseudoVLE32FF_V_M1_]], $v0, [[COPY]], 5 /* e32 */ + ; MIR-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_TU]] + ; MIR-NEXT: PseudoRET implicit $v8 %1 = zext i32 %vl to i64 %a = call { , i64 } @llvm.riscv.vleff.nxv2i32( undef, * %p, i64 %1) %b = extractvalue { , i64 } %a, 0 @@ -250,11 +425,22 @@ define @vpmerge_vlse( %passthru, * %p, %m, i64 %s, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vlse: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vlse32.v v9, (a0), a1 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu +; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vlse + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $x10, $v0, $x11, $x12 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; MIR-NEXT: [[COPY4:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY2]] + ; MIR-NEXT: [[PseudoVLSE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLSE32_V_M1_MASK [[COPY4]], [[COPY3]], [[COPY1]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVLSE32_V_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vlse.nxv2i32( undef, * %p, i64 %s, i64 %1) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) @@ -266,11 +452,22 @@ define @vpmerge_vluxei( %passthru, * %p, %idx, %m, i64 %s, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vluxei: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vluxei64.v v9, (a0), v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu +; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vluxei + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $x10, $v10m2, $v0, $x12 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrm2 = COPY $v10m2 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; MIR-NEXT: [[COPY4:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: early-clobber %6:vrnov0 = PseudoVLUXEI64_V_M2_M1_MASK [[COPY4]], [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %6 + ; MIR-NEXT: PseudoRET implicit $v8 %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vluxei.nxv2i32.nxv2i64( undef, * %p, %idx, i64 %1) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) @@ -282,11 +479,20 @@ define @vpmerge_vid( %passthru, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vid: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vid.v v8, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vid + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVID_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVID_V_M1_MASK [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVID_V_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vid.nxv2i32( undef, i64 %1) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) @@ -298,11 +504,21 @@ define @vpmerge_viota( %passthru, %m, %vm, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_viota: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: viota.m v10, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: viota.m v8, v9, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_viota + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v0, $v9, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY2]] + ; MIR-NEXT: early-clobber %4:vrnov0 = PseudoVIOTA_M_M1_MASK [[COPY3]], [[COPY1]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY %4 + ; MIR-NEXT: PseudoRET implicit $v8 %1 = zext i32 %vl to i64 %a = call @llvm.riscv.viota.nxv2i32( undef, %vm, i64 %1) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) @@ -314,11 +530,21 @@ define @vpmerge_vflcass( %passthru, %vf, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vflcass: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfclass.v v9, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfclass.v v8, v9, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vflcass + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVFCLASS_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVFCLASS_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 + ; MIR-NEXT: $v8 = COPY [[PseudoVFCLASS_V_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfclass.nxv2i32( undef, %vf, i64 %1) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) @@ -330,11 +556,21 @@ define @vpmerge_vfsqrt( %passthru, %vf, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vfsqrt: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsqrt.v v9, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfsqrt.v v8, v9, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vfsqrt + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVFSQRT_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVFSQRT_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0, implicit $frm + ; MIR-NEXT: $v8 = COPY [[PseudoVFSQRT_V_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfsqrt.nxv2f32( undef, %vf, i64 %1) %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) @@ -346,11 +582,21 @@ define @vpmerge_vfrec7( %passthru, %vf, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpmerge_vfrec7: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfrec7.v v9, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfrec7.v v8, v9, v0.t ; CHECK-NEXT: ret + ; MIR-LABEL: name: vpmerge_vfrec7 + ; MIR: bb.0 (%ir-block.0): + ; MIR-NEXT: liveins: $v8, $v9, $v0, $x10 + ; MIR-NEXT: {{ $}} + ; MIR-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; MIR-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 + ; MIR-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v9 + ; MIR-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 + ; MIR-NEXT: $v0 = COPY [[COPY1]] + ; MIR-NEXT: [[PseudoVFREC7_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVFREC7_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0, implicit $frm + ; MIR-NEXT: $v8 = COPY [[PseudoVFREC7_V_M1_MASK]] + ; MIR-NEXT: PseudoRET implicit $v8 %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfrec7.nxv2f32( undef, %vf, i64 %1) %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl)