diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -340,18 +340,25 @@ } } -multiclass VPatWidenBinarySDNode_VV_VX_WV_WX { +multiclass VPatWidenBinarySDNode_VV_VX { foreach vti = AllWidenableIntVectors in { - def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))), - (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), + def : Pat<(op (vti.Wti.Vector (extop1 (vti.Vti.Vector vti.Vti.RegClass:$rs2))), + (vti.Wti.Vector (extop2 (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), (!cast(instruction_name#"_VV_"#vti.Vti.LMul.MX) vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1, vti.Vti.AVL, vti.Vti.Log2SEW)>; - def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))), - (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))), + def : Pat<(op (vti.Wti.Vector (extop1 (vti.Vti.Vector vti.Vti.RegClass:$rs2))), + (vti.Wti.Vector (extop2 (vti.Vti.Vector (SplatPat GPR:$rs1))))), (!cast(instruction_name#"_VX_"#vti.Vti.LMul.MX) vti.Vti.RegClass:$rs2, GPR:$rs1, vti.Vti.AVL, vti.Vti.Log2SEW)>; + } +} + +multiclass VPatWidenBinarySDNode_WV_WX { + foreach vti = AllWidenableIntVectors in { def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2), (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))), (!cast(instruction_name#"_WV_"#vti.Vti.LMul.MX) @@ -365,6 +372,12 @@ } } +multiclass VPatWidenBinarySDNode_VV_VX_WV_WX { + defm : VPatWidenBinarySDNode_VV_VX; + defm : VPatWidenBinarySDNode_WV_WX; +} + multiclass VPatWidenMulAddSDNode_VV { foreach vti = AllWidenableIntVectors in { def : Pat< @@ -632,6 +645,20 @@ defm : VPatBinarySDNode_VV_VX; defm : VPatBinarySDNode_VV_VX; +// 12.12. Vector Widening Integer Multiply Instructions +defm : VPatWidenBinarySDNode_VV_VX; +defm : VPatWidenBinarySDNode_VV_VX; +defm : VPatWidenBinarySDNode_VV_VX; +defm : VPatWidenBinarySDNode_VV_VX; +defm : VPatWidenBinarySDNode_VV_VX; +defm : VPatWidenBinarySDNode_VV_VX; + // 12.13 Vector Single-Width Integer Multiply-Add Instructions. foreach vti = AllIntegerVectors in { // NOTE: We choose VMADD because it has the most commuting freedom. So it diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-sdnode.ll @@ -0,0 +1,339 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +define @vwmul_vv_nxv1i64( %va, %vb) { +; CHECK-LABEL: vwmul_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vwmul.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %vc = sext %va to + %vd = sext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulu_vv_nxv1i64( %va, %vb) { +; CHECK-LABEL: vwmulu_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vwmulu.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %vc = zext %va to + %vd = zext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulsu_vv_nxv1i64( %va, %vb) { +; CHECK-LABEL: vwmulsu_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vwmulsu.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %vc = sext %va to + %vd = zext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmul_vx_nxv1i64( %va, i32 %b) { +; CHECK-LABEL: vwmul_vx_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vwmul.vx v9, v8, a0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = sext %va to + %vd = sext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulu_vx_nxv1i64( %va, i32 %b) { +; CHECK-LABEL: vwmulu_vx_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vwmulu.vx v9, v8, a0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = zext %va to + %vd = zext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulsu_vx_nxv1i64( %va, i32 %b) { +; CHECK-LABEL: vwmulsu_vx_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vwmulsu.vx v9, v8, a0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = sext %va to + %vd = zext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmul_vv_nxv2i64( %va, %vb) { +; CHECK-LABEL: vwmul_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vwmul.vv v10, v8, v9 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %vc = sext %va to + %vd = sext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulu_vv_nxv2i64( %va, %vb) { +; CHECK-LABEL: vwmulu_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vwmulu.vv v10, v8, v9 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %vc = zext %va to + %vd = zext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulsu_vv_nxv2i64( %va, %vb) { +; CHECK-LABEL: vwmulsu_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vwmulsu.vv v10, v8, v9 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %vc = sext %va to + %vd = zext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmul_vx_nxv2i64( %va, i32 %b) { +; CHECK-LABEL: vwmul_vx_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vwmul.vx v10, v8, a0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = sext %va to + %vd = sext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulu_vx_nxv2i64( %va, i32 %b) { +; CHECK-LABEL: vwmulu_vx_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vwmulu.vx v10, v8, a0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = zext %va to + %vd = zext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulsu_vx_nxv2i64( %va, i32 %b) { +; CHECK-LABEL: vwmulsu_vx_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vwmulsu.vx v10, v8, a0 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = sext %va to + %vd = zext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmul_vv_nxv4i64( %va, %vb) { +; CHECK-LABEL: vwmul_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vwmul.vv v12, v8, v10 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %vc = sext %va to + %vd = sext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulu_vv_nxv4i64( %va, %vb) { +; CHECK-LABEL: vwmulu_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vwmulu.vv v12, v8, v10 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %vc = zext %va to + %vd = zext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulsu_vv_nxv4i64( %va, %vb) { +; CHECK-LABEL: vwmulsu_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vwmulsu.vv v12, v8, v10 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %vc = sext %va to + %vd = zext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmul_vx_nxv4i64( %va, i32 %b) { +; CHECK-LABEL: vwmul_vx_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vwmul.vx v12, v8, a0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = sext %va to + %vd = sext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulu_vx_nxv4i64( %va, i32 %b) { +; CHECK-LABEL: vwmulu_vx_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vwmulu.vx v12, v8, a0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = zext %va to + %vd = zext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulsu_vx_nxv4i64( %va, i32 %b) { +; CHECK-LABEL: vwmulsu_vx_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vwmulsu.vx v12, v8, a0 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = sext %va to + %vd = zext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmul_vv_nxv8i64( %va, %vb) { +; CHECK-LABEL: vwmul_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vwmul.vv v16, v8, v12 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %vc = sext %va to + %vd = sext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulu_vv_nxv8i64( %va, %vb) { +; CHECK-LABEL: vwmulu_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vwmulu.vv v16, v8, v12 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %vc = zext %va to + %vd = zext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulsu_vv_nxv8i64( %va, %vb) { +; CHECK-LABEL: vwmulsu_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vwmulsu.vv v16, v8, v12 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %vc = sext %va to + %vd = zext %vb to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmul_vx_nxv8i64( %va, i32 %b) { +; CHECK-LABEL: vwmul_vx_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vwmul.vx v16, v8, a0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = sext %va to + %vd = sext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulu_vx_nxv8i64( %va, i32 %b) { +; CHECK-LABEL: vwmulu_vx_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vwmulu.vx v16, v8, a0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = zext %va to + %vd = zext %splat to + %ve = mul %vc, %vd + ret %ve +} + +define @vwmulsu_vx_nxv8i64( %va, i32 %b) { +; CHECK-LABEL: vwmulsu_vx_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vwmulsu.vx v16, v8, a0 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %b, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vc = sext %va to + %vd = zext %splat to + %ve = mul %vc, %vd + ret %ve +}