diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -514,6 +514,68 @@ defm "" : VPatBinaryFPSDNode_VV_VF; defm "" : VPatBinaryFPSDNode_R_VF; +// 14.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions. +foreach fvti = AllFloatVectors in { + // NOTE: We choose VFMADD because it has the most commuting freedom. So it + // works best with how TwoAddressInstructionPass tries commuting. + def : Pat<(fvti.Vector (fma fvti.RegClass:$rs1, fvti.RegClass:$rd, + fvti.RegClass:$rs2)), + (!cast("PseudoVFMADD_VV_"# fvti.LMul.MX) + fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2, + fvti.AVL, fvti.SEW)>; + def : Pat<(fvti.Vector (fma fvti.RegClass:$rs1, fvti.RegClass:$rd, + (fneg fvti.RegClass:$rs2))), + (!cast("PseudoVFMSUB_VV_"# fvti.LMul.MX) + fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2, + fvti.AVL, fvti.SEW)>; + def : Pat<(fvti.Vector (fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd, + (fneg fvti.RegClass:$rs2))), + (!cast("PseudoVFNMADD_VV_"# fvti.LMul.MX) + fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2, + fvti.AVL, fvti.SEW)>; + def : Pat<(fvti.Vector (fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd, + fvti.RegClass:$rs2)), + (!cast("PseudoVFNMSUB_VV_"# fvti.LMul.MX) + fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2, + fvti.AVL, fvti.SEW)>; + + // The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally + // commutable. + def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), + fvti.RegClass:$rd, fvti.RegClass:$rs2)), + (!cast("PseudoVFMADD_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, + fvti.AVL, fvti.SEW)>; + def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), + fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))), + (!cast("PseudoVFMSUB_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, + fvti.AVL, fvti.SEW)>; + + def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), + (fneg fvti.RegClass:$rd), (fneg fvti.RegClass:$rs2))), + (!cast("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, + fvti.AVL, fvti.SEW)>; + def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1), + (fneg fvti.RegClass:$rd), fvti.RegClass:$rs2)), + (!cast("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, + fvti.AVL, fvti.SEW)>; + + // The splat might be negated. + def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)), + fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))), + (!cast("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, + fvti.AVL, fvti.SEW)>; + def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)), + fvti.RegClass:$rd, fvti.RegClass:$rs2)), + (!cast("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # fvti.LMul.MX) + fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2, + fvti.AVL, fvti.SEW)>; +} + // 14.10. Vector Floating-Point Sign-Injection Instructions // Handle fneg with VFSGNJN using the same input for both operands. foreach vti = AllFloatVectors in { diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll @@ -0,0 +1,391 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +; This tests a mix of vfmacc and vfmadd by using different operand orders to +; trigger commuting in TwoAddressInstructionPass. + +declare @llvm.fma.v1f16(, , ) + +define @vfmadd_vv_nxv1f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vfmadd.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v1f16( %va, %vb, %vc) + ret %vd +} + +define @vfmadd_vf_nxv1f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmadd_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v1f16( %va, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v2f16(, , ) + +define @vfmadd_vv_nxv2f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vfmadd.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v2f16( %va, %vc, %vb) + ret %vd +} + +define @vfmadd_vf_nxv2f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmadd_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vfmadd.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v2f16( %vb, %splat, %va) + ret %vd +} + +declare @llvm.fma.v4f16(, , ) + +define @vfmadd_vv_nxv4f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vfmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v4f16( %vb, %va, %vc) + ret %vd +} + +define @vfmadd_vf_nxv4f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmadd_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v4f16( %va, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v8f16(, , ) + +define @vfmadd_vv_nxv8f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vfmadd.vv v12, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v8f16( %vb, %vc, %va) + ret %vd +} + +define @vfmadd_vf_nxv8f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmadd_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vfmadd.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v8f16( %vb, %splat, %va) + ret %vd +} + +declare @llvm.fma.v16f16(, , ) + +define @vfmadd_vv_nxv16f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vfmadd.vv v8, v16, v12 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v16f16( %vc, %va, %vb) + ret %vd +} + +define @vfmadd_vf_nxv16f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmadd_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v16f16( %va, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v32f16(, , ) + +define @vfmadd_vv_nxv32f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vfmadd.vv v16, v24, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v32f16( %vc, %vb, %va) + ret %vd +} + +define @vfmadd_vf_nxv32f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmadd_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vfmadd.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v32f16( %vb, %splat, %va) + ret %vd +} + +declare @llvm.fma.v1f32(, , ) + +define @vfmadd_vv_nxv1f32( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vfmadd.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v1f32( %va, %vb, %vc) + ret %vd +} + +define @vfmadd_vf_nxv1f32( %va, %vb, float %c) { +; CHECK-LABEL: vfmadd_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v1f32( %va, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v2f32(, , ) + +define @vfmadd_vv_nxv2f32( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vfmadd.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v2f32( %va, %vc, %vb) + ret %vd +} + +define @vfmadd_vf_nxv2f32( %va, %vb, float %c) { +; CHECK-LABEL: vfmadd_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vfmadd.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v2f32( %vb, %splat, %va) + ret %vd +} + +declare @llvm.fma.v4f32(, , ) + +define @vfmadd_vv_nxv4f32( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vfmadd.vv v8, v10, v12 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v4f32( %vb, %va, %vc) + ret %vd +} + +define @vfmadd_vf_nxv4f32( %va, %vb, float %c) { +; CHECK-LABEL: vfmadd_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v4f32( %va, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v8f32(, , ) + +define @vfmadd_vv_nxv8f32( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vfmadd.vv v16, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v8f32( %vb, %vc, %va) + ret %vd +} + +define @vfmadd_vf_nxv8f32( %va, %vb, float %c) { +; CHECK-LABEL: vfmadd_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vfmadd.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v8f32( %vb, %splat, %va) + ret %vd +} + +declare @llvm.fma.v16f32(, , ) + +define @vfmadd_vv_nxv16f32( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vfmadd.vv v8, v24, v16 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v16f32( %vc, %va, %vb) + ret %vd +} + +define @vfmadd_vf_nxv16f32( %va, %vb, float %c) { +; CHECK-LABEL: vfmadd_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v16 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v16f32( %va, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v1f64(, , ) + +define @vfmadd_vv_nxv1f64( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vfmadd.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v1f64( %va, %vb, %vc) + ret %vd +} + +define @vfmadd_vf_nxv1f64( %va, %vb, double %c) { +; CHECK-LABEL: vfmadd_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v1f64( %va, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v2f64(, , ) + +define @vfmadd_vv_nxv2f64( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vfmadd.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v2f64( %va, %vc, %vb) + ret %vd +} + +define @vfmadd_vf_nxv2f64( %va, %vb, double %c) { +; CHECK-LABEL: vfmadd_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vfmadd.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v2f64( %vb, %splat, %va) + ret %vd +} + +declare @llvm.fma.v4f64(, , ) + +define @vfmadd_vv_nxv4f64( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vfmadd.vv v8, v12, v16 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v4f64( %vb, %va, %vc) + ret %vd +} + +define @vfmadd_vf_nxv4f64( %va, %vb, double %c) { +; CHECK-LABEL: vfmadd_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v4f64( %va, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v8f64(, , ) + +define @vfmadd_vv_nxv8f64( %va, %vb, %vc) { +; CHECK-LABEL: vfmadd_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vfmadd.vv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %vd = call @llvm.fma.v8f64( %vb, %vc, %va) + ret %vd +} + +define @vfmadd_vf_nxv8f64( %va, %vb, double %c) { +; CHECK-LABEL: vfmadd_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vfmadd.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vd = call @llvm.fma.v8f64( %vb, %splat, %va) + ret %vd +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll @@ -0,0 +1,421 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +; This tests a mix of vfmsac and vfmsub by using different operand orders to +; trigger commuting in TwoAddressInstructionPass. + +declare @llvm.fma.v1f16(, , ) + +define @vfmsub_vv_nxv1f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vfmsub.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %neg = fneg %vc + %vd = call @llvm.fma.v1f16( %va, %vb, %neg) + ret %vd +} + +define @vfmsub_vf_nxv1f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmsub_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %vb + %vd = call @llvm.fma.v1f16( %va, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v2f16(, , ) + +define @vfmsub_vv_nxv2f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vfmsub.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v2f16( %va, %vc, %neg) + ret %vd +} + +define @vfmsub_vf_nxv2f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmsub_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vfmsub.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v2f16( %vb, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v4f16(, , ) + +define @vfmsub_vv_nxv4f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vfmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %neg = fneg %vc + %vd = call @llvm.fma.v4f16( %vb, %va, %neg) + ret %vd +} + +define @vfmsub_vf_nxv4f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmsub_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %vb + %vd = call @llvm.fma.v4f16( %va, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v8f16(, , ) + +define @vfmsub_vv_nxv8f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vfmsub.vv v12, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %neg = fneg %va + %vd = call @llvm.fma.v8f16( %vb, %vc, %neg) + ret %vd +} + +define @vfmsub_vf_nxv8f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmsub_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vfmsub.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v8f16( %vb, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v16f16(, , ) + +define @vfmsub_vv_nxv16f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vfmsub.vv v8, v16, v12 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v16f16( %vc, %va, %neg) + ret %vd +} + +define @vfmsub_vf_nxv16f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmsub_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %vb + %vd = call @llvm.fma.v16f16( %va, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v32f16(, , ) + +define @vfmsub_vv_nxv32f16( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vfmsub.vv v16, v24, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %neg = fneg %va + %vd = call @llvm.fma.v32f16( %vc, %vb, %neg) + ret %vd +} + +define @vfmsub_vf_nxv32f16( %va, %vb, half %c) { +; CHECK-LABEL: vfmsub_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vfmsub.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v32f16( %vb, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v1f32(, , ) + +define @vfmsub_vv_nxv1f32( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vfmsub.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %neg = fneg %vc + %vd = call @llvm.fma.v1f32( %va, %vb, %neg) + ret %vd +} + +define @vfmsub_vf_nxv1f32( %va, %vb, float %c) { +; CHECK-LABEL: vfmsub_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %vb + %vd = call @llvm.fma.v1f32( %va, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v2f32(, , ) + +define @vfmsub_vv_nxv2f32( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vfmsub.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v2f32( %va, %vc, %neg) + ret %vd +} + +define @vfmsub_vf_nxv2f32( %va, %vb, float %c) { +; CHECK-LABEL: vfmsub_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vfmsub.vf v9, fa0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v2f32( %vb, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v4f32(, , ) + +define @vfmsub_vv_nxv4f32( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vfmsub.vv v8, v10, v12 +; CHECK-NEXT: ret + %neg = fneg %vc + %vd = call @llvm.fma.v4f32( %vb, %va, %neg) + ret %vd +} + +define @vfmsub_vf_nxv4f32( %va, %vb, float %c) { +; CHECK-LABEL: vfmsub_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %vb + %vd = call @llvm.fma.v4f32( %va, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v8f32(, , ) + +define @vfmsub_vv_nxv8f32( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vfmsub.vv v16, v12, v8 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %neg = fneg %va + %vd = call @llvm.fma.v8f32( %vb, %vc, %neg) + ret %vd +} + +define @vfmsub_vf_nxv8f32( %va, %vb, float %c) { +; CHECK-LABEL: vfmsub_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vfmsub.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v8f32( %vb, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v16f32(, , ) + +define @vfmsub_vv_nxv16f32( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vfmsub.vv v8, v24, v16 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v16f32( %vc, %va, %neg) + ret %vd +} + +define @vfmsub_vf_nxv16f32( %va, %vb, float %c) { +; CHECK-LABEL: vfmsub_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v16 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %vb + %vd = call @llvm.fma.v16f32( %va, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v1f64(, , ) + +define @vfmsub_vv_nxv1f64( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vfmsub.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %neg = fneg %vc + %vd = call @llvm.fma.v1f64( %va, %vb, %neg) + ret %vd +} + +define @vfmsub_vf_nxv1f64( %va, %vb, double %c) { +; CHECK-LABEL: vfmsub_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %vb + %vd = call @llvm.fma.v1f64( %va, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v2f64(, , ) + +define @vfmsub_vv_nxv2f64( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vfmsub.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v2f64( %va, %vc, %neg) + ret %vd +} + +define @vfmsub_vf_nxv2f64( %va, %vb, double %c) { +; CHECK-LABEL: vfmsub_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vfmsub.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v2f64( %vb, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v4f64(, , ) + +define @vfmsub_vv_nxv4f64( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vfmsub.vv v8, v12, v16 +; CHECK-NEXT: ret + %neg = fneg %vc + %vd = call @llvm.fma.v4f64( %vb, %va, %neg) + ret %vd +} + +define @vfmsub_vf_nxv4f64( %va, %vb, double %c) { +; CHECK-LABEL: vfmsub_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %vb + %vd = call @llvm.fma.v4f64( %va, %splat, %neg) + ret %vd +} + +declare @llvm.fma.v8f64(, , ) + +define @vfmsub_vv_nxv8f64( %va, %vb, %vc) { +; CHECK-LABEL: vfmsub_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vfmsub.vv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %neg = fneg %va + %vd = call @llvm.fma.v8f64( %vb, %vc, %neg) + ret %vd +} + +define @vfmsub_vf_nxv8f64( %va, %vb, double %c) { +; CHECK-LABEL: vfmsub_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vfmsub.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v8f64( %vb, %splat, %neg) + ret %vd +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll @@ -0,0 +1,447 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +; This tests a mix of vfnmacc and vfnmadd by using different operand orders to +; trigger commuting in TwoAddressInstructionPass. + +declare @llvm.fma.v1f16(, , ) + +define @vfnmsub_vv_nxv1f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vfnmadd.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %neg = fneg %va + %neg2 = fneg %vc + %vd = call @llvm.fma.v1f16( %neg, %vb, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv1f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %neg2 = fneg %vb + %vd = call @llvm.fma.v1f16( %neg, %splat, %neg2) + ret %vd +} + +declare @llvm.fma.v2f16(, , ) + +define @vfnmsub_vv_nxv2f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vfnmadd.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %neg = fneg %va + %neg2 = fneg %vb + %vd = call @llvm.fma.v2f16( %neg, %vc, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv2f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %neg2 = fneg %vb + %vd = call @llvm.fma.v2f16( %splat, %neg, %neg2) + ret %vd +} + +declare @llvm.fma.v4f16(, , ) + +define @vfnmsub_vv_nxv4f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %neg = fneg %vb + %neg2 = fneg %vc + %vd = call @llvm.fma.v4f16( %neg, %va, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv4f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %neg2 = fneg %vb + %vd = call @llvm.fma.v4f16( %va, %neg, %neg2) + ret %vd +} + +declare @llvm.fma.v8f16(, , ) + +define @vfnmsub_vv_nxv8f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vfnmadd.vv v12, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %neg = fneg %vb + %neg2 = fneg %va + %vd = call @llvm.fma.v8f16( %neg, %vc, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv8f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vfnmadd.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %neg2 = fneg %va + %vd = call @llvm.fma.v8f16( %vb, %neg, %neg2) + ret %vd +} + +declare @llvm.fma.v16f16(, , ) + +define @vfnmsub_vv_nxv16f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vfnmadd.vv v8, v16, v12 +; CHECK-NEXT: ret + %neg = fneg %vc + %neg2 = fneg %vb + %vd = call @llvm.fma.v16f16( %neg, %va, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv16f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %neg2 = fneg %vb + %vd = call @llvm.fma.v16f16( %neg, %va, %neg2) + ret %vd +} + +declare @llvm.fma.v32f16(, , ) + +define @vfnmsub_vv_nxv32f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vfnmadd.vv v8, v24, v16 +; CHECK-NEXT: ret + %neg = fneg %vc + %neg2 = fneg %vb + %vd = call @llvm.fma.v32f16( %neg, %va, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv32f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vfnmadd.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %neg2 = fneg %va + %vd = call @llvm.fma.v32f16( %neg, %vb, %neg2) + ret %vd +} + +declare @llvm.fma.v1f32(, , ) + +define @vfnmsub_vv_nxv1f32( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %neg = fneg %vb + %neg2 = fneg %vc + %vd = call @llvm.fma.v1f32( %va, %neg, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv1f32( %va, %vb, float %c) { +; CHECK-LABEL: vfnmsub_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %neg2 = fneg %vb + %vd = call @llvm.fma.v1f32( %neg, %splat, %neg2) + ret %vd +} + +declare @llvm.fma.v2f32(, , ) + +define @vfnmsub_vv_nxv2f32( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vfnmadd.vv v8, v10, v9 +; CHECK-NEXT: ret + %neg = fneg %vc + %neg2 = fneg %vb + %vd = call @llvm.fma.v2f32( %va, %neg, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv2f32( %va, %vb, float %c) { +; CHECK-LABEL: vfnmsub_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %neg2 = fneg %vb + %vd = call @llvm.fma.v2f32( %splat, %neg, %neg2) + ret %vd +} + +declare @llvm.fma.v4f32(, , ) + +define @vfnmsub_vv_nxv4f32( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vfnmadd.vv v10, v8, v12 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %neg = fneg %va + %neg2 = fneg %vc + %vd = call @llvm.fma.v4f32( %vb, %neg, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv4f32( %va, %vb, float %c) { +; CHECK-LABEL: vfnmsub_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %neg2 = fneg %vb + %vd = call @llvm.fma.v4f32( %va, %neg, %neg2) + ret %vd +} + +declare @llvm.fma.v8f32(, , ) + +define @vfnmsub_vv_nxv8f32( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vfnmadd.vv v12, v16, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %neg = fneg %vc + %neg2 = fneg %va + %vd = call @llvm.fma.v8f32( %vb, %neg, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv8f32( %va, %vb, float %c) { +; CHECK-LABEL: vfnmsub_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vfnmadd.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %neg2 = fneg %va + %vd = call @llvm.fma.v8f32( %vb, %neg, %neg2) + ret %vd +} + +declare @llvm.fma.v16f32(, , ) + +define @vfnmsub_vv_nxv16f32( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vfnmadd.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %neg = fneg %va + %neg2 = fneg %vb + %vd = call @llvm.fma.v16f32( %vc, %neg, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv16f32( %va, %vb, float %c) { +; CHECK-LABEL: vfnmsub_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v16 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %neg2 = fneg %vb + %vd = call @llvm.fma.v16f32( %neg, %va, %neg2) + ret %vd +} + +declare @llvm.fma.v1f64(, , ) + +define @vfnmsub_vv_nxv1f64( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vfnmadd.vv v10, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %neg = fneg %vb + %neg2 = fneg %va + %vd = call @llvm.fma.v1f64( %vc, %neg, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv1f64( %va, %vb, double %c) { +; CHECK-LABEL: vfnmsub_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %neg2 = fneg %vb + %vd = call @llvm.fma.v1f64( %neg, %splat, %neg2) + ret %vd +} + +declare @llvm.fma.v2f64(, , ) + +define @vfnmsub_vv_nxv2f64( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vfnmadd.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %neg = fneg %va + %neg2 = fneg %vb + %vd = call @llvm.fma.v2f64( %neg, %vc, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv2f64( %va, %vb, double %c) { +; CHECK-LABEL: vfnmsub_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %neg2 = fneg %vb + %vd = call @llvm.fma.v2f64( %splat, %neg, %neg2) + ret %vd +} + +declare @llvm.fma.v4f64(, , ) + +define @vfnmsub_vv_nxv4f64( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vfnmadd.vv v8, v12, v16 +; CHECK-NEXT: ret + %neg = fneg %vb + %neg2 = fneg %vc + %vd = call @llvm.fma.v4f64( %neg, %va, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv4f64( %va, %vb, double %c) { +; CHECK-LABEL: vfnmsub_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %neg2 = fneg %vb + %vd = call @llvm.fma.v4f64( %va, %neg, %neg2) + ret %vd +} + +declare @llvm.fma.v8f64(, , ) + +define @vfnmsub_vv_nxv8f64( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vfnmadd.vv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %neg = fneg %vb + %neg2 = fneg %va + %vd = call @llvm.fma.v8f64( %neg, %vc, %neg2) + ret %vd +} + +define @vfnmsub_vf_nxv8f64( %va, %vb, double %c) { +; CHECK-LABEL: vfnmsub_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vfnmadd.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %neg2 = fneg %va + %vd = call @llvm.fma.v8f64( %vb, %neg, %neg2) + ret %vd +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll @@ -0,0 +1,417 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +; This tests a mix of vfnmsac and vfnmsub by using different operand orders to +; trigger commuting in TwoAddressInstructionPass. + +declare @llvm.fma.v1f16(, , ) + +define @vfnmsub_vv_nxv1f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vfnmsub.vv v9, v8, v10 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %neg = fneg %va + %vd = call @llvm.fma.v1f16( %neg, %vb, %vc) + ret %vd +} + +define @vfnmsub_vf_nxv1f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v1f16( %neg, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v2f16(, , ) + +define @vfnmsub_vv_nxv2f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vfnmsub.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %neg = fneg %va + %vd = call @llvm.fma.v2f16( %neg, %vc, %vb) + ret %vd +} + +define @vfnmsub_vf_nxv2f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v2f16( %splat, %neg, %vb) + ret %vd +} + +declare @llvm.fma.v4f16(, , ) + +define @vfnmsub_vv_nxv4f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v4f16( %neg, %va, %vc) + ret %vd +} + +define @vfnmsub_vf_nxv4f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %vd = call @llvm.fma.v4f16( %va, %neg, %vb) + ret %vd +} + +declare @llvm.fma.v8f16(, , ) + +define @vfnmsub_vv_nxv8f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vfnmsub.vv v12, v10, v8 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v8f16( %neg, %vc, %va) + ret %vd +} + +define @vfnmsub_vf_nxv8f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,tu,mu +; CHECK-NEXT: vfnmsub.vf v10, fa0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %vd = call @llvm.fma.v8f16( %vb, %neg, %va) + ret %vd +} + +declare @llvm.fma.v16f16(, , ) + +define @vfnmsub_vv_nxv16f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vfnmsub.vv v8, v16, v12 +; CHECK-NEXT: ret + %neg = fneg %vc + %vd = call @llvm.fma.v16f16( %neg, %va, %vb) + ret %vd +} + +define @vfnmsub_vf_nxv16f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %vd = call @llvm.fma.v16f16( %neg, %va, %vb) + ret %vd +} + +declare @llvm.fma.v32f16(, , ) + +define @vfnmsub_vv_nxv32f16( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vfnmsub.vv v8, v24, v16 +; CHECK-NEXT: ret + %neg = fneg %vc + %vd = call @llvm.fma.v32f16( %neg, %va, %vb) + ret %vd +} + +define @vfnmsub_vf_nxv32f16( %va, %vb, half %c) { +; CHECK-LABEL: vfnmsub_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,tu,mu +; CHECK-NEXT: vfnmsub.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, half %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %vd = call @llvm.fma.v32f16( %neg, %vb, %va) + ret %vd +} + +declare @llvm.fma.v1f32(, , ) + +define @vfnmsub_vv_nxv1f32( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v1f32( %va, %neg, %vc) + ret %vd +} + +define @vfnmsub_vf_nxv1f32( %va, %vb, float %c) { +; CHECK-LABEL: vfnmsub_vf_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v1f32( %neg, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v2f32(, , ) + +define @vfnmsub_vv_nxv2f32( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vfnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret + %neg = fneg %vc + %vd = call @llvm.fma.v2f32( %va, %neg, %vb) + ret %vd +} + +define @vfnmsub_vf_nxv2f32( %va, %vb, float %c) { +; CHECK-LABEL: vfnmsub_vf_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v2f32( %splat, %neg, %vb) + ret %vd +} + +declare @llvm.fma.v4f32(, , ) + +define @vfnmsub_vv_nxv4f32( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vfnmsub.vv v10, v8, v12 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %neg = fneg %va + %vd = call @llvm.fma.v4f32( %vb, %neg, %vc) + ret %vd +} + +define @vfnmsub_vf_nxv4f32( %va, %vb, float %c) { +; CHECK-LABEL: vfnmsub_vf_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %vd = call @llvm.fma.v4f32( %va, %neg, %vb) + ret %vd +} + +declare @llvm.fma.v8f32(, , ) + +define @vfnmsub_vv_nxv8f32( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vfnmsub.vv v12, v16, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %neg = fneg %vc + %vd = call @llvm.fma.v8f32( %vb, %neg, %va) + ret %vd +} + +define @vfnmsub_vf_nxv8f32( %va, %vb, float %c) { +; CHECK-LABEL: vfnmsub_vf_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,tu,mu +; CHECK-NEXT: vfnmsub.vf v12, fa0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %vd = call @llvm.fma.v8f32( %vb, %neg, %va) + ret %vd +} + +declare @llvm.fma.v16f32(, , ) + +define @vfnmsub_vv_nxv16f32( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vfnmsub.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %neg = fneg %va + %vd = call @llvm.fma.v16f32( %vc, %neg, %vb) + ret %vd +} + +define @vfnmsub_vf_nxv16f32( %va, %vb, float %c) { +; CHECK-LABEL: vfnmsub_vf_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v16 +; CHECK-NEXT: ret + %head = insertelement undef, float %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %vd = call @llvm.fma.v16f32( %neg, %va, %vb) + ret %vd +} + +declare @llvm.fma.v1f64(, , ) + +define @vfnmsub_vv_nxv1f64( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vfnmsub.vv v10, v9, v8 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v1f64( %vc, %neg, %va) + ret %vd +} + +define @vfnmsub_vf_nxv1f64( %va, %vb, double %c) { +; CHECK-LABEL: vfnmsub_vf_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v1f64( %neg, %splat, %vb) + ret %vd +} + +declare @llvm.fma.v2f64(, , ) + +define @vfnmsub_vv_nxv2f64( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vfnmsub.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %neg = fneg %va + %vd = call @llvm.fma.v2f64( %neg, %vc, %vb) + ret %vd +} + +define @vfnmsub_vf_nxv2f64( %va, %vb, double %c) { +; CHECK-LABEL: vfnmsub_vf_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %va + %vd = call @llvm.fma.v2f64( %splat, %neg, %vb) + ret %vd +} + +declare @llvm.fma.v4f64(, , ) + +define @vfnmsub_vv_nxv4f64( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vfnmsub.vv v8, v12, v16 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v4f64( %neg, %va, %vc) + ret %vd +} + +define @vfnmsub_vf_nxv4f64( %va, %vb, double %c) { +; CHECK-LABEL: vfnmsub_vf_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,tu,mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %vd = call @llvm.fma.v4f64( %va, %neg, %vb) + ret %vd +} + +declare @llvm.fma.v8f64(, , ) + +define @vfnmsub_vv_nxv8f64( %va, %vb, %vc) { +; CHECK-LABEL: vfnmsub_vv_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vfnmsub.vv v24, v16, v8 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %neg = fneg %vb + %vd = call @llvm.fma.v8f64( %neg, %vc, %va) + ret %vd +} + +define @vfnmsub_vf_nxv8f64( %va, %vb, double %c) { +; CHECK-LABEL: vfnmsub_vf_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,tu,mu +; CHECK-NEXT: vfnmsub.vf v16, fa0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %head = insertelement undef, double %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %neg = fneg %splat + %vd = call @llvm.fma.v8f64( %vb, %neg, %va) + ret %vd +}