diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3123,15 +3123,14 @@ return false; } -// Return true if we can make sure mask of N is all-ones mask. -static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { +static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { // Check that we're using V0 as a mask register. - if (!isa(N->getOperand(MaskOpIdx)) || - cast(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) + if (!isa(MaskOp) || + cast(MaskOp)->getReg() != RISCV::V0) return false; // The glued user defines V0. - const auto *Glued = N->getGluedNode(); + const auto *Glued = GlueOp.getNode(); if (!Glued || Glued->getOpcode() != ISD::CopyToReg) return false; @@ -3158,6 +3157,12 @@ IsVMSet(MaskSetter.getMachineOpcode()); } +// Return true if we can make sure mask of N is all-ones mask. +static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { + return usesAllOnesMask(N->getOperand(MaskOpIdx), + N->getOperand(N->getNumOperands() - 1)); +} + static bool isImplicitDef(SDValue V) { return V.isMachineOpcode() && V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; @@ -3213,6 +3218,45 @@ return true; } +static bool IsVMerge(SDNode *N) { + unsigned Opc = N->getMachineOpcode(); + return Opc == RISCV::PseudoVMERGE_VVM_MF8 || + Opc == RISCV::PseudoVMERGE_VVM_MF4 || + Opc == RISCV::PseudoVMERGE_VVM_MF2 || + Opc == RISCV::PseudoVMERGE_VVM_M1 || + Opc == RISCV::PseudoVMERGE_VVM_M2 || + Opc == RISCV::PseudoVMERGE_VVM_M4 || Opc == RISCV::PseudoVMERGE_VVM_M8; +} + +static bool IsVMv(SDNode *N) { + unsigned Opc = N->getMachineOpcode(); + return Opc == RISCV::PseudoVMV_V_V_MF8 || Opc == RISCV::PseudoVMV_V_V_MF4 || + Opc == RISCV::PseudoVMV_V_V_MF2 || Opc == RISCV::PseudoVMV_V_V_M1 || + Opc == RISCV::PseudoVMV_V_V_M2 || Opc == RISCV::PseudoVMV_V_V_M4 || + Opc == RISCV::PseudoVMV_V_V_M8; +} + +static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) { + switch (LMUL) { + case RISCVII::LMUL_F8: + return RISCV::PseudoVMSET_M_B1; + case RISCVII::LMUL_F4: + return RISCV::PseudoVMSET_M_B2; + case RISCVII::LMUL_F2: + return RISCV::PseudoVMSET_M_B4; + case RISCVII::LMUL_1: + return RISCV::PseudoVMSET_M_B8; + case RISCVII::LMUL_2: + return RISCV::PseudoVMSET_M_B16; + case RISCVII::LMUL_4: + return RISCV::PseudoVMSET_M_B32; + case RISCVII::LMUL_8: + return RISCV::PseudoVMSET_M_B64; + case RISCVII::LMUL_RESERVED: + llvm_unreachable("Unexpected LMUL"); + } +} + // Try to fold away VMERGE_VVM instructions. We handle these cases: // -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction // folds to a masked TU instruction. VMERGE_VVM must have have merge operand @@ -3227,16 +3271,27 @@ // form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo // form. bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { - - SDValue Merge = N->getOperand(0); - SDValue False = N->getOperand(1); - SDValue True = N->getOperand(2); - SDValue Mask = N->getOperand(3); - SDValue VL = N->getOperand(4); - // We always have a glue node for the mask at v0 - assert(cast(Mask)->getReg() == RISCV::V0); - SDValue Glue = N->getOperand(N->getNumOperands() - 1); - assert(Glue.getValueType() == MVT::Glue); + SDValue Merge, False, True, VL, Mask, Glue; + // A vmv.v.v is equivalent to a vmerge with an all-ones mask. + if (IsVMv(N)) { + Merge = N->getOperand(0); + False = N->getOperand(0); + True = N->getOperand(1); + VL = N->getOperand(2); + // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones + // mask later below. + } else { + assert(IsVMerge(N)); + Merge = N->getOperand(0); + False = N->getOperand(1); + True = N->getOperand(2); + Mask = N->getOperand(3); + VL = N->getOperand(4); + // We always have a glue node for the mask at v0. + Glue = N->getOperand(N->getNumOperands() - 1); + } + assert(!Mask || cast(Mask)->getReg() == RISCV::V0); + assert(!Glue || Glue.getValueType() == MVT::Glue); // We require that either merge and false are the same, or that merge // is undefined. @@ -3291,7 +3346,7 @@ // the mask from the True instruction. // FIXME: Support mask agnostic True instruction which would have an // undef merge operand. - if (!usesAllOnesMask(N, /* MaskOpIdx */ 3)) + if (Mask && !usesAllOnesMask(Mask, Glue)) return false; } @@ -3315,9 +3370,11 @@ SmallVector LoopWorklist; SmallPtrSet Visited; LoopWorklist.push_back(False.getNode()); - LoopWorklist.push_back(Mask.getNode()); + if (Mask) + LoopWorklist.push_back(Mask.getNode()); LoopWorklist.push_back(VL.getNode()); - LoopWorklist.push_back(Glue.getNode()); + if (Glue) + LoopWorklist.push_back(Glue.getNode()); if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) return false; } @@ -3327,6 +3384,7 @@ unsigned TrueVLIndex = True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; SDValue TrueVL = True.getOperand(TrueVLIndex); + SDValue SEW = True.getOperand(TrueVLIndex + 1); auto GetMinVL = [](SDValue LHS, SDValue RHS) { if (LHS == RHS) @@ -3356,6 +3414,8 @@ !True->getFlags().hasNoFPExcept()) return false; + SDLoc DL(N); + // From the preconditions we checked above, we know the mask and thus glue // for the result node will be taken from True. if (IsMasked) { @@ -3363,8 +3423,22 @@ Glue = True->getOperand(True->getNumOperands() - 1); assert(Glue.getValueType() == MVT::Glue); } + // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create + // an all-ones mask to use. + else if (IsVMv(N)) { + unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags; + unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags)); + ElementCount EC = N->getValueType(0).getVectorElementCount(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, EC); + + SDValue AllOnesMask = + SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0); + SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, + RISCV::V0, AllOnesMask, SDValue()); + Mask = CurDAG->getRegister(RISCV::V0, MaskVT); + Glue = MaskCopy.getValue(1); + } - SDLoc DL(N); unsigned MaskedOpc = Info->MaskedPseudo; #ifndef NDEBUG const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc); @@ -3375,10 +3449,7 @@ "Expected instructions with mask have a tied dest."); #endif - SDValue SEW = True.getOperand(TrueVLIndex + 1); - - uint64_t Policy = isImplicitDef(N->getOperand(0)) ? - RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0; + uint64_t Policy = isImplicitDef(Merge) ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0; SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()); @@ -3468,20 +3539,9 @@ if (N->use_empty() || !N->isMachineOpcode()) continue; - auto IsVMerge = [](unsigned Opcode) { - return Opcode == RISCV::PseudoVMERGE_VVM_MF8 || - Opcode == RISCV::PseudoVMERGE_VVM_MF4 || - Opcode == RISCV::PseudoVMERGE_VVM_MF2 || - Opcode == RISCV::PseudoVMERGE_VVM_M1 || - Opcode == RISCV::PseudoVMERGE_VVM_M2 || - Opcode == RISCV::PseudoVMERGE_VVM_M4 || - Opcode == RISCV::PseudoVMERGE_VVM_M8; - }; - - unsigned Opc = N->getMachineOpcode(); - if (IsVMerge(Opc)) + if (IsVMerge(N) || IsVMv(N)) MadeChange |= performCombineVMergeAndVOps(N); - if (IsVMerge(Opc) && N->getOperand(0) == N->getOperand(1)) + if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1)) MadeChange |= performVMergeToVMv(N); } return MadeChange; diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll --- a/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll @@ -80,10 +80,8 @@ define @foldable_load( %passthru, ptr %p) { ; CHECK-LABEL: foldable_load: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma -; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret %v = call @llvm.riscv.vle.nxv4i32( poison, ptr %p, iXLen 4) %w = call @llvm.riscv.vmv.v.v.nxv4i32( %passthru, %v, iXLen 2) diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -470,13 +470,12 @@ ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v11, v10, a0 ; CHECK-NEXT: vslidedown.vx v8, v9, a0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; CHECK-NEXT: vmv.v.v v9, v11 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vslidedown.vx v9, v10, a0 ; CHECK-NEXT: ret %res = call @llvm.vector.extract.nxv6f16.nxv12f16( %in, i64 6) ret %res diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll @@ -5,10 +5,8 @@ define <4 x i32> @insert_subvector_load_v4i32_v4i32(<4 x i32> %v1, ptr %p) { ; CHECK-LABEL: insert_subvector_load_v4i32_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret %v2 = load <4 x i32>, ptr %p %v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> @@ -19,10 +17,8 @@ define <4 x i32> @insert_subvector_vp_load_v4i32_v4i32(<4 x i32> %v1, ptr %p, <4 x i1> %mask) { ; CHECK-LABEL: insert_subvector_vp_load_v4i32_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v9, (a0), v0.t -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %v2 = call <4 x i32> @llvm.vp.load.v4i32(ptr %p, <4 x i1> %mask, i32 4) %v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> @@ -48,11 +44,8 @@ define <4 x i32> @insert_subvector_load_foldable_passthru_v4i32_v4i32(<4 x i32> %v1, ptr %p, <4 x i1> %mask) { ; CHECK-LABEL: insert_subvector_load_foldable_passthru_v4i32_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vle32.v v9, (a0), v0.t -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret %v2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %p, i32 4, <4 x i1> %mask, <4 x i32> %v1) %v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> @@ -64,9 +57,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vadd.vv v9, v9, v10 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret %v3 = add <4 x i32> %v2, %v4 = shufflevector <4 x i32> %v3, <4 x i32> %v1, <4 x i32> @@ -77,10 +69,8 @@ define <4 x i32> @insert_subvector_vp_add_v4i32_v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i1> %mask) { ; CHECK-LABEL: insert_subvector_vp_add_v4i32_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vadd.vi v9, v9, 1, v0.t -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret %v3 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %v2, <4 x i32> , <4 x i1> %mask, i32 4) %v4 = shufflevector <4 x i32> %v3, <4 x i32> %v1, <4 x i32> @@ -149,10 +139,8 @@ define <4 x i32> @insert_subvector_load_v4i32_v8i32(<4 x i32> %v1, ptr %p) { ; CHECK-LABEL: insert_subvector_load_v4i32_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret %v2 = load <8 x i32>, ptr %p %v3 = shufflevector <8 x i32> %v2, <8 x i32> poison, <4 x i32> @@ -180,9 +168,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vv v9, v10, v9 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vadd.vv v8, v10, v9 ; CHECK-NEXT: ret %v3 = add <8 x i32> %v2, %v4 = shufflevector <8 x i32> %v3, <8 x i32> poison, <4 x i32>