diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -134,7 +134,7 @@ bool doPeepholeMaskedRVV(SDNode *Node); bool doPeepholeMergeVVMFold(); bool performVMergeToVAdd(SDNode *N); - bool performCombineVMergeAndVOps(SDNode *N); + bool performCombineVMergeAndVOps(SDNode *N, bool IsTA); }; namespace RISCV { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2630,11 +2630,14 @@ // peephole only deals with VMERGE_VVM which is TU and has false operand same as // its true operand now. E.g. (VMERGE_VVM_M1_TU False, False, (VADD_M1 ...), // ...) -> (VADD_VV_M1_MASK) -bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { - SDValue Merge = N->getOperand(0); - SDValue True = N->getOperand(2); - SDValue Mask = N->getOperand(3); - SDValue VL = N->getOperand(4); +bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) { + unsigned Offset = IsTA ? 0 : 1; + uint64_t Policy = IsTA ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0; + + SDValue False = N->getOperand(0 + Offset); + SDValue True = N->getOperand(1 + Offset); + SDValue Mask = N->getOperand(2 + Offset); + SDValue VL = N->getOperand(3 + Offset); assert(True.getResNo() == 0 && "Expect True is the first output of an instruction."); @@ -2688,13 +2691,14 @@ unsigned MaskedOpc = Info->MaskedPseudo; assert(RISCVII::hasVecPolicyOp(TII->get(MaskedOpc).TSFlags) && "Expected instructions with mask have policy operand."); + assert(RISCVII::hasMergeOp(TII->get(MaskedOpc).TSFlags) && + "Expected instructions with mask have merge operand."); SmallVector Ops; - Ops.push_back(Merge); + Ops.push_back(False); Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex); Ops.append({Mask, VL, /* SEW */ True.getOperand(TrueVLIndex + 1)}); - Ops.push_back( - CurDAG->getTargetConstant(/* TUMU */ 0, DL, Subtarget->getXLenVT())); + Ops.push_back(CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT())); // Result node should have chain operand of True. if (HasChainOp) @@ -2782,15 +2786,24 @@ Opcode == RISCV::PseudoVMERGE_VVM_M8_TU; }; + auto IsVMergeTA = [](unsigned Opcode) { + return Opcode == RISCV::PseudoVMERGE_VVM_MF8 || + Opcode == RISCV::PseudoVMERGE_VVM_MF4 || + Opcode == RISCV::PseudoVMERGE_VVM_MF2 || + Opcode == RISCV::PseudoVMERGE_VVM_M1 || + Opcode == RISCV::PseudoVMERGE_VVM_M2 || + Opcode == RISCV::PseudoVMERGE_VVM_M4 || + Opcode == RISCV::PseudoVMERGE_VVM_M8; + }; + unsigned Opc = N->getMachineOpcode(); // The following optimizations require that the merge operand of N is same // as the false operand of N. - // TODO: Also deal with TA VMerge nodes. - if (!IsVMergeTU(Opc) || N->getOperand(0) != N->getOperand(1)) - continue; - - MadeChange |= performCombineVMergeAndVOps(N); - MadeChange |= performVMergeToVAdd(N); + if ((IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) || + IsVMergeTA(Opc)) + MadeChange |= performCombineVMergeAndVOps(N, IsVMergeTA(Opc)); + if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) + MadeChange |= performVMergeToVAdd(N); } return MadeChange; } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll @@ -200,8 +200,7 @@ ; CHECK-LABEL: vpselect_vpadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -216,8 +215,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmseq.vv v0, v9, v10 -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -231,11 +229,8 @@ define <8 x i32> @vpselect_vpadd3(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpadd3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu -; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -249,8 +244,7 @@ ; CHECK-LABEL: vpselect_vpfadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vv v9, v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -264,8 +258,7 @@ ; CHECK-LABEL: vpselect_vpfptosi: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -279,8 +272,7 @@ ; CHECK-LABEL: vpselect_vpsitofp: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -294,8 +286,7 @@ ; CHECK-LABEL: vpselect_vpzext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vzext.vf4 v10, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -309,8 +300,7 @@ ; CHECK-LABEL: vpselect_vptrunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vnsrl.wi v9, v10, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -324,9 +314,7 @@ ; CHECK-LABEL: vpselect_vpfpext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -340,8 +328,7 @@ ; CHECK-LABEL: vpselect_vpfptrunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -355,8 +342,7 @@ ; CHECK-LABEL: vpselect_vpload: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer @@ -370,9 +356,8 @@ ; CHECK-LABEL: vpselect_vpload2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v11, (a0) ; CHECK-NEXT: vmseq.vv v0, v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %splat = insertelement <8 x i1> poison, i1 -1, i32 0 %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -6,10 +6,9 @@ ; CHECK-LABEL: vselect_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: vle32.v v10, (a1) -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret %va = load <8 x i32>, <8 x i32>* %a @@ -60,10 +59,9 @@ ; CHECK-LABEL: vselect_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: vle32.v v10, (a1) -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret %va = load <8 x float>, <8 x float>* %a @@ -114,10 +112,9 @@ ; CHECK-LABEL: vselect_vv_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: vle16.v v10, (a1) -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret %va = load <16 x i16>, <16 x i16>* %a @@ -169,10 +166,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a4, 32 ; CHECK-NEXT: vsetvli zero, a4, e16, m4, ta, mu -; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: vle16.v v12, (a1) -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret %va = load <32 x half>, <32 x half>* %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll @@ -210,20 +210,14 @@ ; CHECK32-NEXT: sw a0, 12(sp) ; CHECK32-NEXT: vsetvli a0, zero, e32, m1, ta, mu ; CHECK32-NEXT: vmfge.vf v9, v8, ft0 -; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v10, (a0), zero ; CHECK32-NEXT: vmnot.m v0, v9 -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v8 -; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK32-NEXT: vmerge.vvm v10, v12, v10, v0 -; CHECK32-NEXT: lui a1, %hi(.LCPI5_1) -; CHECK32-NEXT: flw ft0, %lo(.LCPI5_1)(a1) -; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK32-NEXT: vlse64.v v12, (a0), zero +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v10, v8 +; CHECK32-NEXT: lui a0, %hi(.LCPI5_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI5_1)(a0) +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t ; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK32-NEXT: vmerge.vvm v10, v10, v12, v0 -; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t ; CHECK32-NEXT: vmfne.vv v0, v8, v8 ; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 @@ -274,20 +268,14 @@ ; CHECK32-NEXT: sw a0, 12(sp) ; CHECK32-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK32-NEXT: vmfge.vf v10, v8, ft0 -; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v12, (a0), zero ; CHECK32-NEXT: vmnot.m v0, v10 -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v16, v8 -; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK32-NEXT: vmerge.vvm v12, v16, v12, v0 -; CHECK32-NEXT: lui a1, %hi(.LCPI6_1) -; CHECK32-NEXT: flw ft0, %lo(.LCPI6_1)(a1) -; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK32-NEXT: vlse64.v v16, (a0), zero +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v8 +; CHECK32-NEXT: lui a0, %hi(.LCPI6_1) +; CHECK32-NEXT: flw ft0, %lo(.LCPI6_1)(a0) +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t ; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK32-NEXT: vmerge.vvm v12, v12, v16, v0 -; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t ; CHECK32-NEXT: vmfne.vv v0, v8, v8 ; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 @@ -458,18 +446,15 @@ ; CHECK32-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK32-NEXT: vmfge.vf v10, v8, ft0 ; CHECK32-NEXT: vmnot.m v0, v10 +; CHECK32-NEXT: vfcvt.rtz.x.f.v v10, v8 +; CHECK32-NEXT: lui a0, %hi(.LCPI12_1) +; CHECK32-NEXT: fld ft0, %lo(.LCPI12_1)(a0) ; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v10, (a0), zero -; CHECK32-NEXT: lui a1, %hi(.LCPI12_1) -; CHECK32-NEXT: fld ft0, %lo(.LCPI12_1)(a1) -; CHECK32-NEXT: vfcvt.rtz.x.f.v v12, v8 -; CHECK32-NEXT: vlse64.v v14, (a0), zero -; CHECK32-NEXT: vmerge.vvm v12, v12, v10, v0 +; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t ; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vmfne.vv v10, v8, v8 -; CHECK32-NEXT: vmerge.vvm v8, v12, v14, v0 -; CHECK32-NEXT: vmv1r.v v0, v10 -; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK32-NEXT: addi sp, sp, 16 ; CHECK32-NEXT: ret ; @@ -514,18 +499,15 @@ ; CHECK32-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK32-NEXT: vmfge.vf v12, v8, ft0 ; CHECK32-NEXT: vmnot.m v0, v12 +; CHECK32-NEXT: vfcvt.rtz.x.f.v v12, v8 +; CHECK32-NEXT: lui a0, %hi(.LCPI13_1) +; CHECK32-NEXT: fld ft0, %lo(.LCPI13_1)(a0) ; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v12, (a0), zero -; CHECK32-NEXT: lui a1, %hi(.LCPI13_1) -; CHECK32-NEXT: fld ft0, %lo(.LCPI13_1)(a1) -; CHECK32-NEXT: vfcvt.rtz.x.f.v v16, v8 -; CHECK32-NEXT: vlse64.v v20, (a0), zero -; CHECK32-NEXT: vmerge.vvm v16, v16, v12, v0 +; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t ; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vmfne.vv v12, v8, v8 -; CHECK32-NEXT: vmerge.vvm v8, v16, v20, v0 -; CHECK32-NEXT: vmv1r.v v0, v12 -; CHECK32-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t +; CHECK32-NEXT: vmfne.vv v0, v8, v8 +; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 ; CHECK32-NEXT: addi sp, sp, 16 ; CHECK32-NEXT: ret ; @@ -831,21 +813,16 @@ ; CHECK32-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK32-NEXT: vmfge.vf v9, v8, ft0 ; CHECK32-NEXT: vmnot.m v0, v9 -; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v10, (a0), zero ; CHECK32-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK32-NEXT: vsetvli zero, zero, e32, m1, ta, mu -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v9 -; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK32-NEXT: vmerge.vvm v10, v12, v10, v0 -; CHECK32-NEXT: lui a1, %hi(.LCPI19_1) -; CHECK32-NEXT: flh ft0, %lo(.LCPI19_1)(a1) +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v10, v9 +; CHECK32-NEXT: lui a0, %hi(.LCPI19_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI19_1)(a0) +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t ; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK32-NEXT: vlse64.v v12, (a0), zero ; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK32-NEXT: vmerge.vvm v10, v10, v12, v0 -; CHECK32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK32-NEXT: vlse64.v v10, (a0), zero, v0.t ; CHECK32-NEXT: vmfne.vv v0, v8, v8 ; CHECK32-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK32-NEXT: vmerge.vim v8, v10, 0, v0 @@ -899,21 +876,16 @@ ; CHECK32-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK32-NEXT: vmfge.vf v9, v8, ft0 ; CHECK32-NEXT: vmnot.m v0, v9 -; CHECK32-NEXT: addi a0, sp, 8 -; CHECK32-NEXT: vlse64.v v12, (a0), zero ; CHECK32-NEXT: vfwcvt.f.f.v v10, v8 ; CHECK32-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK32-NEXT: vfwcvt.rtz.x.f.v v16, v10 -; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK32-NEXT: vmerge.vvm v12, v16, v12, v0 -; CHECK32-NEXT: lui a1, %hi(.LCPI20_1) -; CHECK32-NEXT: flh ft0, %lo(.LCPI20_1)(a1) +; CHECK32-NEXT: vfwcvt.rtz.x.f.v v12, v10 +; CHECK32-NEXT: lui a0, %hi(.LCPI20_1) +; CHECK32-NEXT: flh ft0, %lo(.LCPI20_1)(a0) +; CHECK32-NEXT: addi a0, sp, 8 +; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t ; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK32-NEXT: vlse64.v v16, (a0), zero ; CHECK32-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK32-NEXT: vmerge.vvm v12, v12, v16, v0 -; CHECK32-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK32-NEXT: vlse64.v v12, (a0), zero, v0.t ; CHECK32-NEXT: vmfne.vv v0, v8, v8 ; CHECK32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK32-NEXT: vmerge.vim v8, v12, 0, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll @@ -35,11 +35,10 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 [[COPY2]], [[COPY]], 5 /* e32 */ :: (load unknown-size from %ir.p, align 8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[COPY3]], killed [[PseudoVLE32_V_M1_]], $v0, [[COPY]], 5 /* e32 */ - ; CHECK-NEXT: VS1R_V killed [[PseudoVMERGE_VVM_M1_]], [[COPY2]] :: (store unknown-size into %ir.p, align 8) + ; CHECK-NEXT: [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 1 /* ta, mu */ + ; CHECK-NEXT: VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store unknown-size into %ir.p, align 8) ; CHECK-NEXT: PseudoRET %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -443,8 +443,7 @@ ; CHECK-LABEL: vpselect_vpadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -459,8 +458,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmseq.vv v0, v9, v10 -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -474,11 +472,8 @@ define @vpselect_vpadd3( %passthru, %x, %y, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_vpadd3: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -492,8 +487,7 @@ ; CHECK-LABEL: vpselect_vpfadd: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vv v9, v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -520,8 +514,7 @@ ; CHECK-LABEL: vpselect_vpfptosi: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -535,8 +528,7 @@ ; CHECK-LABEL: vpselect_vpsitofp: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -550,8 +542,7 @@ ; CHECK-LABEL: vpselect_vpzext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vzext.vf4 v10, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -565,8 +556,7 @@ ; CHECK-LABEL: vpselect_vptrunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vnsrl.wi v9, v10, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -580,9 +570,7 @@ ; CHECK-LABEL: vpselect_vpfpext: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v12, v10 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -596,8 +584,7 @@ ; CHECK-LABEL: vpselect_vpfptrunc: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -611,8 +598,7 @@ ; CHECK-LABEL: vpselect_vpload: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -626,9 +612,8 @@ ; CHECK-LABEL: vpselect_vpload2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v11, (a0) ; CHECK-NEXT: vmseq.vv v0, v9, v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %mask = shufflevector %splat, poison, zeroinitializer @@ -643,8 +628,7 @@ ; CHECK-LABEL: vpselect_vpload_store: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vs1r.v v8, (a0) ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 @@ -676,8 +660,7 @@ ; CHECK-LABEL: vpselect_vlse: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vlse32.v v9, (a0), a1 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vlse32.v v8, (a0), a1, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vlse.nxv2i32( undef, * %p, i64 %s, i64 %1) @@ -690,8 +673,7 @@ ; CHECK-LABEL: vpselect_vluxei: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vluxei64.v v9, (a0), v10 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vluxei64.v v8, (a0), v10, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vluxei.nxv2i32.nxv2i64( undef, * %p, %idx, i64 %1) @@ -704,8 +686,7 @@ ; CHECK-LABEL: vpselect_vid: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vid.v v8, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vid.nxv2i32( undef, i64 %1) @@ -718,8 +699,7 @@ ; CHECK-LABEL: vpselect_viota: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: viota.m v10, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: viota.m v8, v9, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.viota.nxv2i32( undef, %vm, i64 %1) @@ -732,8 +712,7 @@ ; CHECK-LABEL: vpselect_vflcass: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfclass.v v9, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfclass.v v8, v9, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfclass.nxv2i32( undef, %vf, i64 %1) @@ -746,8 +725,7 @@ ; CHECK-LABEL: vpselect_vfsqrt: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfsqrt.v v9, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfsqrt.v v8, v9, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfsqrt.nxv2f32( undef, %vf, i64 %1) @@ -760,8 +738,7 @@ ; CHECK-LABEL: vpselect_vfrec7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfrec7.v v9, v9 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfrec7.v v8, v9, v0.t ; CHECK-NEXT: ret %1 = zext i32 %vl to i64 %a = call @llvm.riscv.vfrec7.nxv2f32( undef, %vf, i64 %1) @@ -775,10 +752,8 @@ define @vpselect_add( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_add: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = add %x, %y %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) @@ -789,10 +764,8 @@ define @vpselect_fadd( %passthru, %x, %y, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_fadd: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vv v9, v9, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = fadd %x, %y %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) @@ -803,10 +776,8 @@ define @vpselect_fptosi( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_fptosi: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v9 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret %a = fptosi %x to %b = call @llvm.vp.select.nxv2i16( %m, %a, %passthru, i32 %vl) @@ -817,10 +788,8 @@ define @vpselect_sitofp( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_sitofp: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.x.w v9, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret %a = sitofp %x to %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) @@ -831,10 +800,8 @@ define @vpselect_fpext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_fpext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vfwcvt.f.f.v v12, v10 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret %a = fpext %x to %b = call @llvm.vp.select.nxv2f64( %m, %a, %passthru, i32 %vl) @@ -845,10 +812,8 @@ define @vpselect_fptrunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_fptrunc: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vfncvt.f.f.w v9, v10 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret %a = fptrunc %x to %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) @@ -859,10 +824,8 @@ define @vpselect_zext( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_zext: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vzext.vf4 v10, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret %a = zext %x to %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) @@ -873,10 +836,8 @@ define @vpselect_trunc( %passthru, %x, %m, i32 zeroext %vl) { ; CHECK-LABEL: vpselect_trunc: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vnsrl.wi v9, v10, 0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t ; CHECK-NEXT: ret %a = trunc %x to %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll @@ -244,11 +244,10 @@ ; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vmseq.vi v0, v9, 0 ; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vmseq.vi v0, v10, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vlse64.v v8, (a0), zero, v0.t ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: @@ -265,11 +264,10 @@ ; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vmseq.vi v0, v10, 0 ; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vmseq.vi v0, v12, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vlse64.v v8, (a0), zero, v0.t ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: @@ -286,11 +284,10 @@ ; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vmseq.vi v0, v12, 0 ; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vmseq.vi v0, v16, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vlse64.v v8, (a0), zero, v0.t ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: @@ -307,11 +304,10 @@ ; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vmseq.vi v0, v16, 0 ; CHECK-NEXT: addi a0, sp, 8 -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vid.v v24 -; CHECK-NEXT: vmseq.vi v0, v24, 0 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vlse64.v v8, (a0), zero, v0.t ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll @@ -666,8 +666,7 @@ ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu -; CHECK-NEXT: vlse64.v v9, (a0), zero -; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: vlse64.v v8, (a0), zero, v0.t ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i64 %b, i32 0 @@ -707,8 +706,7 @@ ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, mu -; CHECK-NEXT: vlse64.v v10, (a0), zero -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vlse64.v v8, (a0), zero, v0.t ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i64 %b, i32 0 @@ -748,8 +746,7 @@ ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, mu -; CHECK-NEXT: vlse64.v v12, (a0), zero -; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: vlse64.v v8, (a0), zero, v0.t ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i64 %b, i32 0 @@ -789,8 +786,7 @@ ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vlse64.v v8, (a0), zero, v0.t ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i64 %b, i32 0