diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -432,7 +432,7 @@ SDValue visitSUBE(SDNode *N); SDValue visitUSUBO_CARRY(SDNode *N); SDValue visitSSUBO_CARRY(SDNode *N); - SDValue visitMUL(SDNode *N); + template SDValue visitMUL(SDNode *N); SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); @@ -1898,7 +1898,8 @@ case ISD::SMULFIXSAT: case ISD::UMULFIX: case ISD::UMULFIXSAT: return visitMULFIX(N); - case ISD::MUL: return visitMUL(N); + case ISD::MUL: + return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); case ISD::SREM: @@ -4265,11 +4266,13 @@ return SDValue(); } -SDValue DAGCombiner::visitMUL(SDNode *N) { +template SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); SDLoc DL(N); + bool IsVP = ISD::isVPOpcode(N->getOpcode()); + MatchContextClass matcher(DAG, TLI, N); // fold (mul x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) @@ -4282,14 +4285,15 @@ // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(ISD::MUL, DL, VT, N1, N0); + return matcher.getNode(ISD::MUL, DL, VT, N1, N0); bool N1IsConst = false; bool N1IsOpaqueConst = false; APInt ConstValue1; // fold vector ops - if (VT.isVector()) { + // TODO: Change this to use SimplifyVBinOp when it supports VP op. + if (VT.isVector() && !IsVP) { if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; @@ -4313,12 +4317,13 @@ if (N1IsConst && ConstValue1.isOne()) return N0; - if (SDValue NewSel = foldBinOpIntoSelect(N)) - return NewSel; + if (!IsVP) + if (SDValue NewSel = foldBinOpIntoSelect(N)) + return NewSel; // fold (mul x, -1) -> 0-x if (N1IsConst && ConstValue1.isAllOnes()) - return DAG.getNegative(N0, DL, VT); + return matcher.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && @@ -4327,7 +4332,7 @@ SDValue LogBase2 = BuildLogBase2(N1, DL); EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); - return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); + return matcher.getNode(ISD::SHL, DL, VT, N0, Trunc); } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c @@ -4337,26 +4342,27 @@ // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(0, DL, VT), - DAG.getNode(ISD::SHL, DL, VT, N0, - DAG.getConstant(Log2Val, DL, ShiftVT))); + return matcher.getNode( + ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + matcher.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(Log2Val, DL, ShiftVT))); } // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the // hi result is in use in case we hit this mid-legalization. - for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) { - if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) { - SDVTList LoHiVT = DAG.getVTList(VT, VT); - // TODO: Can we match commutable operands with getNodeIfExists? - if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1})) - if (LoHi->hasAnyUseOfValue(1)) - return SDValue(LoHi, 0); - if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0})) - if (LoHi->hasAnyUseOfValue(1)) - return SDValue(LoHi, 0); + if (!IsVP) + for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) { + if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) { + SDVTList LoHiVT = DAG.getVTList(VT, VT); + // TODO: Can we match commutable operands with getNodeIfExists? + if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1})) + if (LoHi->hasAnyUseOfValue(1)) + return SDValue(LoHi, 0); + if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0})) + if (LoHi->hasAnyUseOfValue(1)) + return SDValue(LoHi, 0); + } } - } // Try to transform: // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub. @@ -4373,7 +4379,8 @@ // x * 0xf800 --> (x << 16) - (x << 11) // x * -0x8800 --> -((x << 15) + (x << 11)) // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16) - if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { + if (!IsVP && N1IsConst && + TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { // TODO: We could handle more general decomposition of any constant by // having the target set a limit on number of ops and making a // callback to determine that sequence (similar to sqrt expansion). @@ -4407,7 +4414,7 @@ } // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N0.getOpcode() == ISD::SHL) { + if (matcher.match(N0, ISD::SHL)) { SDValue N01 = N0.getOperand(1); if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01})) return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3); @@ -4419,34 +4426,34 @@ SDValue Sh, Y; // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). - if (N0.getOpcode() == ISD::SHL && + if (matcher.match(N0, ISD::SHL) && isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) { Sh = N0; Y = N1; - } else if (N1.getOpcode() == ISD::SHL && + } else if (matcher.match(N1, ISD::SHL) && isConstantOrConstantVector(N1.getOperand(1)) && N1->hasOneUse()) { Sh = N1; Y = N0; } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1)); + SDValue Mul = matcher.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y); + return matcher.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (N0.getOpcode() == ISD::ADD && + if (matcher.match(N0, ISD::ADD) && DAG.isConstantIntBuildVectorOrConstantInt(N1) && DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && isMulAddWithConstProfitable(N, N0, N1)) - return DAG.getNode( + return matcher.getNode( ISD::ADD, DL, VT, - DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); + matcher.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), + matcher.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). ConstantSDNode *NC1 = isConstOrConstSplat(N1); - if (N0.getOpcode() == ISD::VSCALE && NC1) { + if (!IsVP && N0.getOpcode() == ISD::VSCALE && NC1) { const APInt &C0 = N0.getConstantOperandAPInt(0); const APInt &C1 = NC1->getAPIntValue(); return DAG.getVScale(DL, VT, C0 * C1); @@ -4454,7 +4461,7 @@ // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). APInt MulVal; - if (N0.getOpcode() == ISD::STEP_VECTOR && + if (!IsVP && N0.getOpcode() == ISD::STEP_VECTOR && ISD::isConstantSplatVector(N1.getNode(), MulVal)) { const APInt &C0 = N0.getConstantOperandAPInt(0); APInt NewStep = C0 * MulVal; @@ -4492,13 +4499,17 @@ } // reassociate mul - if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags())) - return RMUL; + // TODO: Change reassociateOps to support vp ops. + if (!IsVP) + if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags())) + return RMUL; // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y)) - if (SDValue SD = - reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1)) - return SD; + // TODO: Change reassociateReduction to support vp ops. + if (!IsVP) + if (SDValue SD = + reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1)) + return SD; // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) @@ -25702,6 +25713,10 @@ return visitVP_FSUB(N); case ISD::VP_FMA: return visitFMA(N); + case ISD::VP_MUL: + return visitMUL(N); + default: + break; } return SDValue(); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll @@ -989,3 +989,291 @@ %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) ret <16 x i64> %v } + + +define <8 x i64> @vmul_vv_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmul_vv_undef_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmul_vv_undef_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: ret + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> undef, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vmul_vx_undef_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmul_vx_undef_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> undef, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_zero_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmul_vx_zero_v8i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmul_vx_zero_v8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 0, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_zero_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; RV32-LABEL: vmul_vx_zero_v8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmul_vx_zero_v8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 0, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_one_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_one_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_one_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_one_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_negone_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_negone_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_negone_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_negone_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_pow2_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_pow2_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 64, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_pow2_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_pow2_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 6 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 64, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_negpow2_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_negpow2_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -64 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -64, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vx_negpow2_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_negpow2_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -64 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 -64, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <8 x i64> @llvm.vp.shl.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vmul_vshl_vx_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vshl_vx_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t +; CHECK-NEXT: li a0, 7 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0 + %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer + %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0 + %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer + %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vc, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vshl_vx_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vshl_vx_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 56 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0 + %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer + %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0 + %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer + %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vc, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vshl_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vshl_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t +; CHECK-NEXT: vmul.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 7, i32 0 + %vc = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vc, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vshl_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %vb, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vshl_vv_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmul.vv v8, v8, v12 +; CHECK-NEXT: vsll.vi v8, v8, 7 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %elt.head = insertelement <8 x i64> poison, i64 7, i32 0 + %vc = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %vshl = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vc, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vshl, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +declare <8 x i64> @llvm.vp.add.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) + +define <8 x i64> @vmul_vadd_vx_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vadd_vx_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 3, v0.t +; CHECK-NEXT: li a0, 7 +; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0 + %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer + %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0 + %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer + %vadd = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vadd, <8 x i64> %vc, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} + +define <8 x i64> @vmul_vadd_vx_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vadd_vx_v8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: li a1, 7 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmadd.vx v8, a1, v12 +; CHECK-NEXT: ret + %head = insertelement <8 x i1> poison, i1 true, i32 0 + %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer + %elt.head1 = insertelement <8 x i64> poison, i64 3, i32 0 + %vb = shufflevector <8 x i64> %elt.head1, <8 x i64> poison, <8 x i32> zeroinitializer + %elt.head2 = insertelement <8 x i64> poison, i64 7, i32 0 + %vc = shufflevector <8 x i64> %elt.head2, <8 x i64> poison, <8 x i32> zeroinitializer + %vadd = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %vadd, <8 x i64> %vc, <8 x i1> %m, i32 %evl) + ret <8 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll @@ -1291,3 +1291,266 @@ %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) ret %v } + +define @vmul_vv_undef_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vv_undef_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %v = call @llvm.vp.mul.nxv8i64( %va, undef, %m, i32 %evl) + ret %v +} + +define @vmul_vx_undef_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_undef_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, undef, %m, i32 %evl) + ret %v +} + +define @vmul_vx_zero_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_zero_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, zero, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 0, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_zero_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_zero_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, zero +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 0, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_one_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_one_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 1, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_one_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_one_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 1, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_negone_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_negone_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 -1, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_negone_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_negone_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 -1, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_pow2_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_pow2_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 64, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_pow2_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_pow2_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 6 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 64, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_negpow2_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_negpow2_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -64 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 -64, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_negpow2_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_negpow2_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, -64 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a1 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 -64, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.shl.nxv8i64(, , , i32) + +define @vmul_vshl_vx_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vshl_vx_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 56 +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head1 = insertelement poison, i64 3, i32 0 + %vb = shufflevector %elt.head1, poison, zeroinitializer + %elt.head2 = insertelement poison, i64 7, i32 0 + %vc = shufflevector %elt.head2, poison, zeroinitializer + %vshl = call @llvm.vp.shl.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i64( %vshl, %vc, %m, i32 %evl) + ret %v +} + +define @vmul_vshl_vx_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vshl_vx_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 56 +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %elt.head1 = insertelement poison, i64 3, i32 0 + %vb = shufflevector %elt.head1, poison, zeroinitializer + %elt.head2 = insertelement poison, i64 7, i32 0 + %vc = shufflevector %elt.head2, poison, zeroinitializer + %vshl = call @llvm.vp.shl.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i64( %vshl, %vc, %m, i32 %evl) + ret %v +} + +define @vmul_vshl_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vshl_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmul.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 7, i32 0 + %vc = shufflevector %elt.head, poison, zeroinitializer + %vshl = call @llvm.vp.shl.nxv8i64( %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i64( %vshl, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vshl_vv_nxv8i64_unmasked( %va, %vb, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vshl_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmul.vv v8, v8, v16 +; CHECK-NEXT: vsll.vi v8, v8, 7 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %elt.head = insertelement poison, i64 7, i32 0 + %vc = shufflevector %elt.head, poison, zeroinitializer + %vshl = call @llvm.vp.shl.nxv8i64( %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i64( %vshl, %vb, %m, i32 %evl) + ret %v +} + +declare @llvm.vp.add.nxv8i64(, , , i32) + +define @vmul_vadd_vx_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vadd_vx_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmul.vx v8, v8, a1, v0.t +; CHECK-NEXT: li a0, 21 +; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %elt.head1 = insertelement poison, i64 3, i32 0 + %vb = shufflevector %elt.head1, poison, zeroinitializer + %elt.head2 = insertelement poison, i64 7, i32 0 + %vc = shufflevector %elt.head2, poison, zeroinitializer + %vadd = call @llvm.vp.add.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i64( %vadd, %vc, %m, i32 %evl) + ret %v +} + +define @vmul_vadd_vx_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vadd_vx_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a1 +; CHECK-NEXT: li a1, 7 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmadd.vx v8, a1, v16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %elt.head1 = insertelement poison, i64 3, i32 0 + %vb = shufflevector %elt.head1, poison, zeroinitializer + %elt.head2 = insertelement poison, i64 7, i32 0 + %vc = shufflevector %elt.head2, poison, zeroinitializer + %vadd = call @llvm.vp.add.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i64( %vadd, %vc, %m, i32 %evl) + ret %v +}