diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) +declare @llvm.vp.fadd.nxv1f64( %x, %y, %m, i32 %vl) + +; (fadd (fmul x, y), z)) -> (fma x, y, z) +define @fma( %x, %y, %z, %m, i32 zeroext %vl) { +; CHECK-LABEL: fma: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %1 = call fast @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) + %2 = call fast @llvm.vp.fadd.nxv1f64( %1, %z, %m, i32 %vl) + ret %2 +} + +; (fadd z, (fmul x, y))) -> (fma x, y, z) +define @fma_commute( %x, %y, %z, %m, i32 zeroext %vl) { +; CHECK-LABEL: fma_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfadd.vv v8, v10, v8, v0.t +; CHECK-NEXT: ret + %1 = call fast @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) + %2 = call fast @llvm.vp.fadd.nxv1f64( %z, %1, %m, i32 %vl) + ret %2 +} + +; Test operand with true mask +define @fma_true( %x, %y, %z, %m, i32 zeroext %vl) { +; CHECK-LABEL: fma_true: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: vfadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %true = shufflevector %head, poison, zeroinitializer + %1 = call fast @llvm.vp.fmul.nxv1f64( %x, %y, %true, i32 %vl) + %2 = call fast @llvm.vp.fadd.nxv1f64( %1, %z, %m, i32 %vl) + ret %2 +} + +; Test operand with normal opcode. +define @fma_nonvp( %x, %y, %z, %m, i32 zeroext %vl) { +; CHECK-LABEL: fma_nonvp: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret + %1 = fmul fast %x, %y + %2 = call fast @llvm.vp.fadd.nxv1f64( %1, %z, %m, i32 %vl) + ret %2 +}