Index: llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h =================================================================== --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -182,7 +182,7 @@ bool doPeepholeMaskedRVV(SDNode *Node); bool doPeepholeMergeVVMFold(); bool performVMergeToVAdd(SDNode *N); - bool performCombineVMergeAndVOps(SDNode *N, bool IsTA); + bool performCombineVMergeAndVOps(SDNode *N); }; namespace RISCV { Index: llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3215,6 +3215,11 @@ return true; } +static bool isImplicitDef(SDValue V) { + return V.isMachineOpcode() && + V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; +} + // Try to fold away VMERGE_VVM instructions. We handle these cases: // -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction // folds to a masked TU instruction. VMERGE_VVM must have have merge operand @@ -3224,14 +3229,16 @@ // -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to // masked TU instruction. Both instructions must have the same merge operand. // VMERGE_VVM must have have merge operand same as false operand. -bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) { - unsigned Offset = IsTA ? 0 : 1; +// Note: The VMERGE_VVM is always in the _TU psuedo form; it's TA vs TU in the +// statements above solely based on whether the passthrough is implicit_def. +bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { + bool IsTA = isImplicitDef(N->getOperand(0)); uint64_t Policy = IsTA ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0; - SDValue False = N->getOperand(0 + Offset); - SDValue True = N->getOperand(1 + Offset); - SDValue Mask = N->getOperand(2 + Offset); - SDValue VL = N->getOperand(3 + Offset); + SDValue False = N->getOperand(1); + SDValue True = N->getOperand(2); + SDValue Mask = N->getOperand(3); + SDValue VL = N->getOperand(4); assert(True.getResNo() == 0 && "Expect True is the first output of an instruction."); @@ -3429,34 +3436,23 @@ if (N->use_empty() || !N->isMachineOpcode()) continue; - auto IsVMergeTU = [](unsigned Opcode) { - return Opcode == RISCV::PseudoVMERGE_VVM_MF8_TU || - Opcode == RISCV::PseudoVMERGE_VVM_MF4_TU || - Opcode == RISCV::PseudoVMERGE_VVM_MF2_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M1_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M2_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M4_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M8_TU; - }; - - auto IsVMergeTA = [](unsigned Opcode) { - return Opcode == RISCV::PseudoVMERGE_VVM_MF8 || - Opcode == RISCV::PseudoVMERGE_VVM_MF4 || - Opcode == RISCV::PseudoVMERGE_VVM_MF2 || - Opcode == RISCV::PseudoVMERGE_VVM_M1 || - Opcode == RISCV::PseudoVMERGE_VVM_M2 || - Opcode == RISCV::PseudoVMERGE_VVM_M4 || - Opcode == RISCV::PseudoVMERGE_VVM_M8; - }; - - unsigned Opc = N->getMachineOpcode(); - // The following optimizations require that the merge operand of N is same - // as the false operand of N. - if ((IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) || - IsVMergeTA(Opc)) - MadeChange |= performCombineVMergeAndVOps(N, IsVMergeTA(Opc)); - if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) - MadeChange |= performVMergeToVAdd(N); + switch (N->getMachineOpcode()) { + case RISCV::PseudoVMERGE_VVM_MF8_TU: + case RISCV::PseudoVMERGE_VVM_MF4_TU: + case RISCV::PseudoVMERGE_VVM_MF2_TU: + case RISCV::PseudoVMERGE_VVM_M1_TU: + case RISCV::PseudoVMERGE_VVM_M2_TU: + case RISCV::PseudoVMERGE_VVM_M4_TU: + case RISCV::PseudoVMERGE_VVM_M8_TU: + // The following optimizations require that the merge operand of N is same + // as the false operand of N or that the merge operand is implicit_def. + if (N->getOperand(0) == N->getOperand(1) || + isImplicitDef(N->getOperand(0))) + MadeChange |= performCombineVMergeAndVOps(N); + if (N->getOperand(0) == N->getOperand(1)) + MadeChange |= performVMergeToVAdd(N); + break; + } } return MadeChange; } Index: llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -6655,21 +6655,21 @@ /*CarryOut = */0, /*vtilist=*/AllFloatVectors>; foreach fvti = AllFloatVectors in { - defvar instr = !cast("PseudoVMERGE_VIM_"#fvti.LMul.MX); + defvar instr = !cast("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU"); let Predicates = GetVTypePredicates.Predicates in def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector undef), (fvti.Vector fvti.RegClass:$rs2), (fvti.Scalar (fpimm0)), (fvti.Mask V0), VLOpFrag)), - (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; - defvar instr_tu = !cast("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU"); + (instr (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, 0, (fvti.Mask V0), + GPR:$vl, fvti.Log2SEW)>; let Predicates = GetVTypePredicates.Predicates in def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$merge), (fvti.Vector fvti.RegClass:$rs2), (fvti.Scalar (fpimm0)), (fvti.Mask V0), VLOpFrag)), - (instr_tu fvti.RegClass:$merge, fvti.RegClass:$rs2, 0, - (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + (instr fvti.RegClass:$merge, fvti.RegClass:$rs2, 0, + (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; } //===----------------------------------------------------------------------===// Index: llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -932,18 +932,21 @@ let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vselect (vti.Mask V0), vti.RegClass:$rs1, vti.RegClass:$rs2)), - (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>; def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat XLenVT:$rs1), vti.RegClass:$rs2)), - (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>; def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat_simm5 simm5:$rs1), vti.RegClass:$rs2)), - (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>; } } @@ -1145,14 +1148,16 @@ let Predicates = GetVTypePredicates.Predicates in { def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1, fvti.RegClass:$rs2)), - (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX) + (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; def : Pat<(fvti.Vector (vselect (fvti.Mask V0), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2)), - (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) + (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; @@ -1160,7 +1165,8 @@ def : Pat<(fvti.Vector (vselect (fvti.Mask V0), (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2)), - (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) + (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; } } Index: llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1857,7 +1857,8 @@ vti.RegClass:$rs1, vti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; @@ -1865,14 +1866,16 @@ (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0), (SplatPat_simm5 simm5:$rs1), vti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) + (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX#"_TU") + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0), @@ -2065,7 +2068,8 @@ fvti.RegClass:$rs1, fvti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX) + (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; @@ -2073,7 +2077,8 @@ (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) + (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; @@ -2082,7 +2087,8 @@ (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), fvti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX) + (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; @@ -2091,7 +2097,8 @@ (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2, VLOpFrag)), - (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) + (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU") + (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0), Index: llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -154,9 +154,9 @@ ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vmerge.vim v24, v16, 1, v0 ; RV32-NEXT: vs8r.v v24, (a3) -; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vmerge.vim v8, v16, 1, v0 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: vs8r.v v8, (a2) ; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -80 @@ -194,9 +194,9 @@ ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vmerge.vim v24, v16, 1, v0 ; RV64-NEXT: vs8r.v v24, (a3) -; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vmerge.vim v8, v16, 1, v0 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: vs8r.v v8, (a2) ; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -80 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -20,14 +20,14 @@ ; CHECK-LABEL: insertelt_idx_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <1 x i1> %x, i1 %elt, i32 %idx @@ -38,11 +38,11 @@ ; CHECK-LABEL: insertelt_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <2 x i1> %x, i1 %elt, i64 1 @@ -53,14 +53,14 @@ ; CHECK-LABEL: insertelt_idx_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <2 x i1> %x, i1 %elt, i32 %idx @@ -71,13 +71,13 @@ ; CHECK-LABEL: insertelt_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <8 x i1> %x, i1 %elt, i64 1 @@ -88,14 +88,14 @@ ; CHECK-LABEL: insertelt_idx_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement <8 x i1> %x, i1 %elt, i32 %idx Index: llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -438,14 +438,14 @@ ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a1, a0, 3 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v9, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma Index: llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll +++ llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll @@ -6,13 +6,13 @@ ; CHECK-LABEL: insertelt_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf8, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 2 @@ -23,14 +23,14 @@ ; CHECK-LABEL: insertelt_idx_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 %idx @@ -41,13 +41,13 @@ ; CHECK-LABEL: insertelt_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 2 @@ -58,14 +58,14 @@ ; CHECK-LABEL: insertelt_idx_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 %idx @@ -76,13 +76,13 @@ ; CHECK-LABEL: insertelt_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 2 @@ -93,14 +93,14 @@ ; CHECK-LABEL: insertelt_idx_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 %idx @@ -111,13 +111,13 @@ ; CHECK-LABEL: insertelt_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetivli zero, 3, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 +; CHECK-NEXT: vslideup.vi v8, v9, 2 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 2 @@ -128,14 +128,14 @@ ; CHECK-LABEL: insertelt_idx_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v9, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 %idx Index: llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -497,18 +497,18 @@ define @reverse_nxv64i1( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v8 -; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 -; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8 -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8 +; RV32-BITS-UNKNOWN-NEXT: vid.v v16 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v8, v16 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v12, v16 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 @@ -531,18 +531,17 @@ ; ; RV32-BITS-512-LABEL: reverse_nxv64i1: ; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV32-BITS-512-NEXT: vmv.v.i v8, 0 +; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 ; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vid.v v8 -; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0 -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-512-NEXT: vmv.v.i v16, 0 -; RV32-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0 -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vrgather.vv v28, v16, v8 -; RV32-BITS-512-NEXT: vrgather.vv v24, v20, v8 +; RV32-BITS-512-NEXT: vid.v v16 +; RV32-BITS-512-NEXT: vrsub.vx v16, v16, a0 +; RV32-BITS-512-NEXT: vrgather.vv v28, v8, v16 +; RV32-BITS-512-NEXT: vrgather.vv v24, v12, v16 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-512-NEXT: vand.vi v8, v24, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 @@ -550,18 +549,18 @@ ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 +; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v8 -; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 -; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8 -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8 +; RV64-BITS-UNKNOWN-NEXT: vid.v v16 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v8, v16 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v12, v16 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 @@ -584,18 +583,17 @@ ; ; RV64-BITS-512-LABEL: reverse_nxv64i1: ; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV64-BITS-512-NEXT: vmv.v.i v8, 0 +; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vid.v v8 -; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-512-NEXT: vmv.v.i v16, 0 -; RV64-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0 -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vrgather.vv v28, v16, v8 -; RV64-BITS-512-NEXT: vrgather.vv v24, v20, v8 +; RV64-BITS-512-NEXT: vid.v v16 +; RV64-BITS-512-NEXT: vrsub.vx v16, v16, a0 +; RV64-BITS-512-NEXT: vrgather.vv v28, v8, v16 +; RV64-BITS-512-NEXT: vrgather.vv v24, v12, v16 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-512-NEXT: vand.vi v8, v24, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 Index: llvm/test/CodeGen/RISCV/rvv/vector-splice.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -9,17 +9,19 @@ define @splice_nxv1i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv1i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslidedown.vx v9, v9, a0 +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv1i1( %a, %b, i32 -1) @@ -29,19 +31,21 @@ define @splice_nxv1i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv1i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 1 +; CHECK-NEXT: vslidedown.vi v9, v9, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vx v9, v8, a0 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv1i1( %a, %b, i32 1) @@ -53,17 +57,19 @@ define @splice_nxv2i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv2i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslidedown.vx v9, v9, a0 +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv2i1( %a, %b, i32 -1) @@ -73,19 +79,21 @@ define @splice_nxv2i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv2i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -3 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 3 +; CHECK-NEXT: vslidedown.vi v9, v9, 3 ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vx v9, v8, a0 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv2i1( %a, %b, i32 3) @@ -97,17 +105,19 @@ define @splice_nxv4i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv4i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslidedown.vx v9, v9, a0 +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv4i1( %a, %b, i32 -1) @@ -117,19 +127,21 @@ define @splice_nxv4i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv4i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -7 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 7 +; CHECK-NEXT: vslidedown.vi v9, v9, 7 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vx v9, v8, a0 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv4i1( %a, %b, i32 7) @@ -141,16 +153,18 @@ define @splice_nxv8i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv8i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v10, v10, a0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vi v10, v8, 1 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslidedown.vx v9, v9, a0 +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv8i1( %a, %b, i32 -1) @@ -160,18 +174,20 @@ define @splice_nxv8i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv8i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -15 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 15 +; CHECK-NEXT: vslidedown.vi v9, v9, 15 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 -; CHECK-NEXT: vslideup.vx v10, v8, a0 -; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vx v9, v8, a0 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv8i1( %a, %b, i32 15) @@ -183,19 +199,21 @@ define @splice_nxv16i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv16i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v12, v12, a0 +; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vslideup.vi v12, v8, 1 -; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv16i1( %a, %b, i32 -1) @@ -205,19 +223,21 @@ define @splice_nxv16i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv16i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -31 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v12, 31 +; CHECK-NEXT: vslidedown.vi v8, v8, 31 ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vslideup.vx v12, v8, a0 -; CHECK-NEXT: vand.vi v8, v12, 1 +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv16i1( %a, %b, i32 31) @@ -229,19 +249,21 @@ define @splice_nxv32i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv32i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v16, v16, a0 +; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 -; CHECK-NEXT: vslideup.vi v16, v8, 1 -; CHECK-NEXT: vand.vi v8, v16, 1 +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv32i1( %a, %b, i32 -1) @@ -276,19 +298,21 @@ define @splice_nxv64i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv64i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v24, a0 +; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: vslideup.vi v24, v8, 1 -; CHECK-NEXT: vand.vi v8, v24, 1 +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv64i1( %a, %b, i32 -1)