diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -133,6 +133,8 @@ bool doPeepholeSExtW(SDNode *Node); bool doPeepholeMaskedRVV(SDNode *Node); bool doPeepholeMergeVVMFold(); + bool performVMergeToVAdd(SDNode *N); + bool performCombineVMergeAndVOps(SDNode *N); }; namespace RISCV { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2505,18 +2505,8 @@ return false; } -// Optimize masked RVV pseudo instructions with a known all-ones mask to their -// corresponding "unmasked" pseudo versions. The mask we're interested in will -// take the form of a V0 physical register operand, with a glued -// register-setting instruction. -bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { - const RISCV::RISCVMaskedPseudoInfo *I = - RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); - if (!I) - return false; - - unsigned MaskOpIdx = I->MaskOpIdx; - +// Return true if we can make sure mask of N is all-ones mask. +static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { // Check that we're using V0 as a mask register. if (!isa(N->getOperand(MaskOpIdx)) || cast(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) @@ -2546,7 +2536,23 @@ // TODO: Check that the VMSET is the expected bitwidth? The pseudo has // undefined behaviour if it's the wrong bitwidth, so we could choose to // assume that it's all-ones? Same applies to its VL. - if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) + return MaskSetter->isMachineOpcode() && + IsVMSet(MaskSetter.getMachineOpcode()); +} + +// Optimize masked RVV pseudo instructions with a known all-ones mask to their +// corresponding "unmasked" pseudo versions. The mask we're interested in will +// take the form of a V0 physical register operand, with a glued +// register-setting instruction. +bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { + const RISCV::RISCVMaskedPseudoInfo *I = + RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); + if (!I) + return false; + + unsigned MaskOpIdx = I->MaskOpIdx; + + if (!usesAllOnesMask(N, MaskOpIdx)) return false; // Retrieve the tail policy operand index, if any. @@ -2600,6 +2606,7 @@ } // Transitively apply any node glued to our new node. + const auto *Glued = N->getGluedNode(); if (auto *TGlued = Glued->getGluedNode()) Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); @@ -2614,121 +2621,167 @@ // peephole only deals with VMERGE_VVM which is TU and has false operand same as // its true operand now. E.g. (VMERGE_VVM_M1_TU False, False, (VADD_M1 ...), // ...) -> (VADD_VV_M1_MASK) -bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { - bool MadeChange = false; - SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); +bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { + SDValue Merge = N->getOperand(0); + SDValue True = N->getOperand(2); + SDValue Mask = N->getOperand(3); + SDValue VL = N->getOperand(4); - while (Position != CurDAG->allnodes_begin()) { - SDNode *N = &*--Position; - if (N->use_empty() || !N->isMachineOpcode()) - continue; + assert(True.getResNo() == 0 && + "Expect True is the first output of an instruction."); - auto IsVMergeTU = [](unsigned Opcode) { - return Opcode == RISCV::PseudoVMERGE_VVM_MF8_TU || - Opcode == RISCV::PseudoVMERGE_VVM_MF4_TU || - Opcode == RISCV::PseudoVMERGE_VVM_MF2_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M1_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M2_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M4_TU || - Opcode == RISCV::PseudoVMERGE_VVM_M8_TU; - }; + // Need N is the exactly one using True. + if (!True.hasOneUse()) + return false; - unsigned Opc = N->getMachineOpcode(); - // TODO: Also deal with TA VMerge nodes. - if (!IsVMergeTU(Opc)) - continue; + if (!True.isMachineOpcode()) + return false; - SDValue Merge = N->getOperand(0); - SDValue False = N->getOperand(1); - SDValue True = N->getOperand(2); - SDValue Mask = N->getOperand(3); - SDValue VL = N->getOperand(4); + unsigned TrueOpc = True.getMachineOpcode(); - if (Merge != False) - continue; + // Skip if True has merge operand. + // TODO: Deal with True having same merge operand with N. + if (RISCVII::hasMergeOp(TII->get(TrueOpc).TSFlags)) + return false; - assert(True.getResNo() == 0 && - "Expect True is the first output of an instruction."); + // Skip if True has side effect. + // TODO: Support velff and vlsegff. + if (TII->get(TrueOpc).hasUnmodeledSideEffects()) + return false; - // Need N is the exactly one using True. - if (!True.hasOneUse()) - continue; + // Only deal with True when True is unmasked intrinsic now. + const RISCV::RISCVMaskedPseudoInfo *Info = + RISCV::lookupMaskedIntrinsicByUnmaskedTA(TrueOpc); - if (!True.isMachineOpcode()) - continue; + if (!Info) + return false; - unsigned TrueOpc = True.getMachineOpcode(); + // The last operand of unmasked intrinsic should be sew or chain. + bool HasChainOp = + True.getOperand(True.getNumOperands() - 1).getValueType() == MVT::Other; - // Skip if True has merge operand. - // TODO: Deal with True having same merge operand with N. - if (RISCVII::hasMergeOp(TII->get(TrueOpc).TSFlags)) - continue; + // Need True has same VL with N. + unsigned TrueVLIndex = True.getNumOperands() - HasChainOp - 2; + SDValue TrueVL = True.getOperand(TrueVLIndex); - // Skip if True has side effect. - // TODO: Support velff and vlsegff. - if (TII->get(TrueOpc).hasUnmodeledSideEffects()) - continue; + auto IsNoFPExcept = [this](SDValue N) { + return !this->mayRaiseFPException(N.getNode()) || + N->getFlags().hasNoFPExcept(); + }; - // Only deal with True when True is unmasked intrinsic now. - const RISCV::RISCVMaskedPseudoInfo *Info = - RISCV::lookupMaskedIntrinsicByUnmaskedTA(TrueOpc); + // Allow the peephole for non-exception True with VLMAX vector length, since + // all the values after VL of N are dependent on Merge. VLMAX should be + // lowered to (XLenVT -1). + if (TrueVL != VL && !(IsNoFPExcept(True) && isAllOnesConstant(TrueVL))) + return false; - if (!Info) - continue; + SDLoc DL(N); + unsigned MaskedOpc = Info->MaskedPseudo; + assert(RISCVII::hasVecPolicyOp(TII->get(MaskedOpc).TSFlags) && + "Expected instructions with mask have policy operand."); - // The last operand of unmasked intrinsic should be sew or chain. - bool HasChainOp = - True.getOperand(True.getNumOperands() - 1).getValueType() == MVT::Other; + SmallVector Ops; + Ops.push_back(Merge); + Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex); + Ops.append({Mask, VL, /* SEW */ True.getOperand(TrueVLIndex + 1)}); + Ops.push_back( + CurDAG->getTargetConstant(/* TUMU */ 0, DL, Subtarget->getXLenVT())); - // Need True has same VL with N. - unsigned TrueVLIndex = True.getNumOperands() - HasChainOp - 2; - SDValue TrueVL = True.getOperand(TrueVLIndex); + // Result node should have chain operand of True. + if (HasChainOp) + Ops.push_back(True.getOperand(True.getNumOperands() - 1)); - auto IsNoFPExcept = [this](SDValue N) { - return !this->mayRaiseFPException(N.getNode()) || - N->getFlags().hasNoFPExcept(); - }; + // Result node should take over glued node of N. + if (N->getGluedNode()) + Ops.push_back(N->getOperand(N->getNumOperands() - 1)); - // Allow the peephole for non-exception True with VLMAX vector length, since - // all the values after VL of N are dependent on Merge. VLMAX should be - // lowered to (XLenVT -1). - if (TrueVL != VL && !(IsNoFPExcept(True) && isAllOnesConstant(TrueVL))) - continue; + SDNode *Result = + CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); + Result->setFlags(True->getFlags()); - SDLoc DL(N); - unsigned MaskedOpc = Info->MaskedPseudo; - assert(RISCVII::hasVecPolicyOp(TII->get(MaskedOpc).TSFlags) && - "Expected instructions with mask have policy operand."); + // Replace vmerge.vvm node by Result. + ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); - SmallVector Ops; - Ops.push_back(Merge); - Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex); - Ops.append({Mask, VL, /* SEW */ True.getOperand(TrueVLIndex + 1)}); - Ops.push_back( - CurDAG->getTargetConstant(/* TUMU */ 0, DL, Subtarget->getXLenVT())); + // Replace another value of True. E.g. chain and VL. + for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) + ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); - // Result node should have chain operand of True. - if (HasChainOp) - Ops.push_back(True.getOperand(True.getNumOperands() - 1)); + // Try to transform Result to unmasked intrinsic. + doPeepholeMaskedRVV(Result); + return true; +} - // Result node should take over glued node of N. - if (N->getGluedNode()) - Ops.push_back(N->getOperand(N->getNumOperands() - 1)); +// Transform (VMERGE_VVM__TU false, false, true, allones, vl, sew) to +// (VADD_VI__TU false, true, 0, vl, sew). It may decrease uses of VMSET. +bool RISCVDAGToDAGISel::performVMergeToVAdd(SDNode *N) { + unsigned NewOpc; + switch (N->getMachineOpcode()) { + default: + llvm_unreachable("Expected VMERGE_VVM__TU instruction."); + case RISCV::PseudoVMERGE_VVM_MF8_TU: + NewOpc = RISCV::PseudoVADD_VI_MF8_TU; + break; + case RISCV::PseudoVMERGE_VVM_MF4_TU: + NewOpc = RISCV::PseudoVADD_VI_MF4_TU; + break; + case RISCV::PseudoVMERGE_VVM_MF2_TU: + NewOpc = RISCV::PseudoVADD_VI_MF2_TU; + break; + case RISCV::PseudoVMERGE_VVM_M1_TU: + NewOpc = RISCV::PseudoVADD_VI_M1_TU; + break; + case RISCV::PseudoVMERGE_VVM_M2_TU: + NewOpc = RISCV::PseudoVADD_VI_M2_TU; + break; + case RISCV::PseudoVMERGE_VVM_M4_TU: + NewOpc = RISCV::PseudoVADD_VI_M4_TU; + break; + case RISCV::PseudoVMERGE_VVM_M8_TU: + NewOpc = RISCV::PseudoVADD_VI_M8_TU; + break; + } - SDNode *Result = - CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); - Result->setFlags(True->getFlags()); + if (!usesAllOnesMask(N, /* MaskOpIdx */ 3)) + return false; - // Replace vmerge.vvm node by Result. - ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue Ops[] = {N->getOperand(1), N->getOperand(2), + CurDAG->getTargetConstant(0, DL, Subtarget->getXLenVT()), + N->getOperand(4), N->getOperand(5)}; + SDNode *Result = CurDAG->getMachineNode(NewOpc, DL, VT, Ops); + ReplaceUses(N, Result); + return true; +} + +bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { + bool MadeChange = false; + SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); - // Replace another value of True. E.g. chain and VL. - for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) - ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + auto IsVMergeTU = [](unsigned Opcode) { + return Opcode == RISCV::PseudoVMERGE_VVM_MF8_TU || + Opcode == RISCV::PseudoVMERGE_VVM_MF4_TU || + Opcode == RISCV::PseudoVMERGE_VVM_MF2_TU || + Opcode == RISCV::PseudoVMERGE_VVM_M1_TU || + Opcode == RISCV::PseudoVMERGE_VVM_M2_TU || + Opcode == RISCV::PseudoVMERGE_VVM_M4_TU || + Opcode == RISCV::PseudoVMERGE_VVM_M8_TU; + }; + + unsigned Opc = N->getMachineOpcode(); + // The following optimizations require that the merge operand of N is same + // as the false operand of N. + // TODO: Also deal with TA VMerge nodes. + if (!IsVMergeTU(Opc) || N->getOperand(0) != N->getOperand(1)) + continue; - // Try to transform Result to unmasked intrinsic. - doPeepholeMaskedRVV(Result); - MadeChange = true; + MadeChange |= performCombineVMergeAndVOps(N); + MadeChange |= performVMergeToVAdd(N); } return MadeChange; } diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vadd.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vadd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vadd.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s +define @vpmerge_mf8( %x, %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_mf8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vadd.vi v8, v9, 0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i8 0 + %allones = shufflevector %splat, poison, zeroinitializer + %1 = call @llvm.vp.merge.nxv1i8( %allones, %y, %x, i32 %vl) + ret %1 +} + +define @vpmerge_mf4( %x, %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_mf4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vadd.vi v8, v9, 0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i8 0 + %allones = shufflevector %splat, poison, zeroinitializer + %1 = call @llvm.vp.merge.nxv2i8( %allones, %y, %x, i32 %vl) + ret %1 +} + +define @vpmerge_mf2( %x, %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_mf2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vadd.vi v8, v9, 0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i8 0 + %allones = shufflevector %splat, poison, zeroinitializer + %1 = call @llvm.vp.merge.nxv4i8( %allones, %y, %x, i32 %vl) + ret %1 +} + +define @vpmerge_m1( %x, %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_m1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vadd.vi v8, v9, 0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i8 0 + %allones = shufflevector %splat, poison, zeroinitializer + %1 = call @llvm.vp.merge.nxv8i8( %allones, %y, %x, i32 %vl) + ret %1 +} + +define @vpmerge_m2( %x, %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_m2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vadd.vi v8, v10, 0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i16 0 + %allones = shufflevector %splat, poison, zeroinitializer + %1 = call @llvm.vp.merge.nxv8i16( %allones, %y, %x, i32 %vl) + ret %1 +} + +define @vpmerge_m4( %x, %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_m4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vadd.vi v8, v12, 0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %1 = call @llvm.vp.merge.nxv8i32( %allones, %y, %x, i32 %vl) + ret %1 +} + +define @vpmerge_m8( %x, %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_m8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vadd.vi v8, v16, 0 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i64 0 + %allones = shufflevector %splat, poison, zeroinitializer + %1 = call @llvm.vp.merge.nxv8i64( %allones, %y, %x, i32 %vl) + ret %1 +} + +declare @llvm.vp.merge.nxv1i8(, , , i32) +declare @llvm.vp.merge.nxv2i8(, , , i32) +declare @llvm.vp.merge.nxv4i8(, , , i32) +declare @llvm.vp.merge.nxv8i8(, , , i32) +declare @llvm.vp.merge.nxv8i16(, , , i32) +declare @llvm.vp.merge.nxv8i32(, , , i32) +declare @llvm.vp.merge.nxv8i64(, , , i32)