diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -512,6 +512,7 @@ SDValue visitFP16_TO_FP(SDNode *N); SDValue visitVECREDUCE(SDNode *N); SDValue visitVPOp(SDNode *N); + SDValue visitVP_MUL(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); @@ -22843,6 +22844,51 @@ return SDValue(); } +SDValue DAGCombiner::visitVP_MUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Mask = N->getOperand(2); + SDValue EVL = N->getOperand(3); + EVT VT = N0.getValueType(); + + ConstantSDNode *ConstValue0 = isConstOrConstSplat(N0); + ConstantSDNode *ConstValue1 = isConstOrConstSplat(N1); + + // canonicalize constant splat to RHS + if (ConstValue0 && !ConstValue1) + return DAG.getNode(ISD::VP_MUL, SDLoc(N), VT, N1, N0, Mask, EVL); + + // fold vp_mul X, (splat 0), Mask, EVL -> splat 0 + if (ConstValue1 && ConstValue1->isZero()) { + return DAG.getSplatVector( + VT, SDLoc(N), + DAG.getConstant(0, SDLoc(N), ConstValue1->getValueType(0))); + } + + // fold vp_mul X, (splat 1), Mask, EVL -> X + if (ConstValue1 && ConstValue1->isOne()) + return N0; + + // fold vp_mul X, (splat -1), Mask, EVL -> vp_sub (splat 0), X, Mask, EVL + if (ConstValue1 && ConstValue1->isAllOnes()) { + SDValue ZeroSplat = DAG.getSplatVector( + VT, SDLoc(N), + DAG.getConstant(0, SDLoc(N), ConstValue1->getValueType(0))); + return DAG.getNode(ISD::VP_SUB, SDLoc(N), VT, ZeroSplat, N0, Mask, EVL); + } + + // fold vp_mul X, (1 << C), Mask, EVL -> vp_shl X, C, Mask, EVL + if (ConstValue1 && ConstValue1->getAPIntValue().isPowerOf2()) { + SDValue Splat = DAG.getSplatVector( + VT, SDLoc(N), + DAG.getConstant(ConstValue1->getAPIntValue().countTrailingZeros(), + SDLoc(N), ConstValue1->getValueType(0))); + return DAG.getNode(ISD::VP_SHL, SDLoc(N), VT, N0, Splat, Mask, EVL); + } + + return SDValue(); +} + SDValue DAGCombiner::visitVPOp(SDNode *N) { // VP operations in which all vector elements are disabled - either by // determining that the mask is all false or that the EVL is 0 - can be @@ -22855,8 +22901,16 @@ ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode()); // This is the only generic VP combine we support for now. - if (!AreAllEltsDisabled) + if (!AreAllEltsDisabled) { + switch (N->getOpcode()) { + case ISD::VP_MUL: + return visitVP_MUL(N); + // TODO: Support more VP op. + default: + break; + } return SDValue(); + } // Binary operations can be replaced by UNDEF. if (ISD::isVPBinaryOp(N->getOpcode())) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll @@ -989,3 +989,103 @@ %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) ret <16 x i64> %v } + +define <16 x i64> @vmul_vx_1_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_1_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmul_vx_1_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_1_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vrsub.vi v8, v8, 0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmul_vx_2_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_2_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 64, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmul_vx_2_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_2_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vsll.vi v8, v8, 6 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 64, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmul_vx_3_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_3_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmul_vx_3_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_3_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmul_vx_4_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_4_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 0, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} + +define <16 x i64> @vmul_vx_4_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_4_v16i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> poison, i64 0, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer + %head = insertelement <16 x i1> poison, i1 true, i32 0 + %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + ret <16 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll @@ -1291,3 +1291,104 @@ %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) ret %v } + +define @vmul_vx_1_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_1_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 -1, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_1_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_1_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vrsub.vi v8, v8, 0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 -1, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_2_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_2_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vsll.vi v8, v8, 6, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 64, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_2_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_2_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vsll.vi v8, v8, 6 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 64, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_3_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_3_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 1, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_3_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_3_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 1, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + + +define @vmul_vx_4_nxv8i64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_4_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 0, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +} + +define @vmul_vx_4_nxv8i64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vmul_vx_4_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i64 0, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + ret %v +}