diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -1,9 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+zfh,+experimental-zvfh -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zfh,+experimental-zvfh -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s +declare <7 x i1> @llvm.vp.fcmp.v7f16(<7 x half>, <7 x half>, metadata, <7 x i1>, i32) + +define <7 x i1> @fcmp_oeq_vv_v7f16(<7 x half> %va, <7 x half> %vb, <7 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_v7f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call <7 x i1> @llvm.vp.fcmp.v7f16(<7 x half> %va, <7 x half> %vb, metadata !"oeq", <7 x i1> %m, i32 %evl) + ret <7 x i1> %v +} + declare <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half>, <8 x half>, metadata, <8 x i1>, i32) define <8 x i1> @fcmp_oeq_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { @@ -550,6 +562,69 @@ ret <8 x i1> %v } +declare <128 x i1> @llvm.vp.fcmp.v128f16(<128 x half>, <128 x half>, metadata, <128 x i1>, i32) + +define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_v128f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: addi a4, a0, 128 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vle16.v v24, (a4) +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, mu +; CHECK-NEXT: addi a4, a2, -64 +; CHECK-NEXT: vslidedown.vi v0, v0, 8 +; CHECK-NEXT: bltu a2, a4, .LBB43_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: .LBB43_2: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v2, v16, v24, v0.t +; CHECK-NEXT: bltu a2, a1, .LBB43_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: .LBB43_4: +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vsetivli zero, 16, e8, m1, tu, mu +; CHECK-NEXT: vslideup.vi v16, v2, 8 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <128 x i1> @llvm.vp.fcmp.v128f16(<128 x half> %va, <128 x half> %vb, metadata !"oeq", <128 x i1> %m, i32 %evl) + ret <128 x i1> %v +} + +declare <7 x i1> @llvm.vp.fcmp.v7f64(<7 x double>, <7 x double>, metadata, <7 x i1>, i32) + +define <7 x i1> @fcmp_oeq_vv_v7f64(<7 x double> %va, <7 x double> %vb, <7 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_v7f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: ret + %v = call <7 x i1> @llvm.vp.fcmp.v7f64(<7 x double> %va, <7 x double> %vb, metadata !"oeq", <7 x i1> %m, i32 %evl) + ret <7 x i1> %v +} + declare <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double>, <8 x double>, metadata, <8 x i1>, i32) define <8 x i1> @fcmp_oeq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { @@ -1113,3 +1188,74 @@ %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %vb, <8 x double> %va, metadata !"uno", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } + +declare <32 x i1> @llvm.vp.fcmp.v32f64(<32 x double>, <32 x double>, metadata, <32 x i1>, i32) + +define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, a2, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: bltu a2, a3, .LBB87_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: .LBB87_2: +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v1, v16, v8, v0.t +; CHECK-NEXT: bltu a2, a0, .LBB87_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: .LBB87_4: +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v16, v1, 2 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <32 x i1> @llvm.vp.fcmp.v32f64(<32 x double> %va, <32 x double> %vb, metadata !"oeq", <32 x i1> %m, i32 %evl) + ret <32 x i1> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+experimental-zvfh -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+zfh,+experimental-zvfh -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zfh,+experimental-zvfh -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fcmp.nxv1f16(, , metadata, , i32) @@ -550,6 +550,18 @@ ret %v } +declare @llvm.vp.fcmp.nxv3f16(, , metadata, , i32) + +define @fcmp_oeq_vv_nxv3f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_nxv3f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v9, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.fcmp.nxv3f16( %va, %vb, metadata !"oeq", %m, i32 %evl) + ret %v +} + declare @llvm.vp.fcmp.nxv8f16(, , metadata, , i32) define @fcmp_oeq_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { @@ -1114,6 +1126,58 @@ ret %v } +declare @llvm.vp.fcmp.nxv64f16(, , metadata, , i32) + +define @fcmp_oeq_vv_nxv64f16( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_nxv64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a4, 0 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a1, a3, 1 +; CHECK-NEXT: vsetvli a5, zero, e8, m1, ta, mu +; CHECK-NEXT: slli a5, a3, 3 +; CHECK-NEXT: add a5, a0, a5 +; CHECK-NEXT: vl8re16.v v24, (a5) +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub a5, a2, a3 +; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: bltu a2, a5, .LBB85_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a4, a5 +; CHECK-NEXT: .LBB85_2: +; CHECK-NEXT: vl8re16.v v8, (a0) +; CHECK-NEXT: vsetvli zero, a4, e16, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v2, v16, v24, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB85_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB85_4: +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: add a0, a1, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vslideup.vx v16, v2, a1 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.vp.fcmp.nxv64f16( %va, %vb, metadata !"oeq", %m, i32 %evl) + ret %v +} + declare @llvm.vp.fcmp.nxv1f64(, , metadata, , i32) define @fcmp_oeq_vv_nxv1f64( %va, %vb, %m, i32 zeroext %evl) { @@ -1660,6 +1724,19 @@ ret %v } +declare @llvm.vp.fcmp.nxv3f64(, , metadata, , i32) + +define @fcmp_oeq_vv_nxv3f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_nxv3f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.fcmp.nxv3f64( %va, %vb, metadata !"oeq", %m, i32 %evl) + ret %v +} + declare @llvm.vp.fcmp.nxv8f64(, , metadata, , i32) define @fcmp_oeq_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { @@ -2223,3 +2300,170 @@ %v = call @llvm.vp.fcmp.nxv8f64( %vb, %va, metadata !"uno", %m, i32 %evl) ret %v } + +declare @llvm.vp.fcmp.nxv32f64(, , metadata, , i32) + +define @fcmp_oeq_vv_nxv32f64( %va, %vb, %m, i32 zeroext %evl) { +; CHECK-LABEL: fcmp_oeq_vv_nxv32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a1, a3, 3 +; CHECK-NEXT: slli a5, a3, 3 +; CHECK-NEXT: slli a7, a3, 1 +; CHECK-NEXT: add a4, a2, a5 +; CHECK-NEXT: mv t0, a6 +; CHECK-NEXT: bltu a6, a7, .LBB171_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv t0, a7 +; CHECK-NEXT: .LBB171_2: +; CHECK-NEXT: li t1, 0 +; CHECK-NEXT: vsetvli t2, zero, e8, mf4, ta, mu +; CHECK-NEXT: vl8re64.v v16, (a4) +; CHECK-NEXT: srli a4, a3, 2 +; CHECK-NEXT: sub t2, t0, a3 +; CHECK-NEXT: vslidedown.vx v0, v24, a1 +; CHECK-NEXT: bltu t0, t2, .LBB171_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv t1, t2 +; CHECK-NEXT: .LBB171_4: +; CHECK-NEXT: li t2, 24 +; CHECK-NEXT: vsetvli t3, zero, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v1, v24, a4 +; CHECK-NEXT: vl8re64.v v8, (a2) +; CHECK-NEXT: csrr t3, vlenb +; CHECK-NEXT: slli t3, t3, 3 +; CHECK-NEXT: add t3, sp, t3 +; CHECK-NEXT: addi t3, t3, 16 +; CHECK-NEXT: vs8r.v v8, (t3) # Unknown-size Folded Spill +; CHECK-NEXT: slli t3, a3, 4 +; CHECK-NEXT: vsetvli zero, t1, e64, m8, ta, ma +; CHECK-NEXT: csrr t1, vlenb +; CHECK-NEXT: slli t1, t1, 4 +; CHECK-NEXT: add t1, sp, t1 +; CHECK-NEXT: addi t1, t1, 16 +; CHECK-NEXT: vl8re8.v v8, (t1) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v2, v8, v16, v0.t +; CHECK-NEXT: bltu t0, a3, .LBB171_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: mv t0, a3 +; CHECK-NEXT: .LBB171_6: +; CHECK-NEXT: li t1, 0 +; CHECK-NEXT: mul t4, a3, t2 +; CHECK-NEXT: add t2, a2, t3 +; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: li t3, 24 +; CHECK-NEXT: mul t0, t0, t3 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 16 +; CHECK-NEXT: vl8re8.v v24, (t0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr t0, vlenb +; CHECK-NEXT: slli t0, t0, 3 +; CHECK-NEXT: add t0, sp, t0 +; CHECK-NEXT: addi t0, t0, 16 +; CHECK-NEXT: vl8re8.v v8, (t0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v17, v24, v8, v0.t +; CHECK-NEXT: sub t0, a6, a7 +; CHECK-NEXT: add a7, a1, a1 +; CHECK-NEXT: bltu a6, t0, .LBB171_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: mv t1, t0 +; CHECK-NEXT: .LBB171_8: +; CHECK-NEXT: add a2, a2, t4 +; CHECK-NEXT: vl8re64.v v8, (t2) +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: li t0, 24 +; CHECK-NEXT: mul a6, a6, t0 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: csrr a6, vlenb +; CHECK-NEXT: slli a6, a6, 4 +; CHECK-NEXT: add a6, sp, a6 +; CHECK-NEXT: addi a6, a6, 16 +; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: add a0, a0, a5 +; CHECK-NEXT: vsetvli zero, a7, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v17, v2, a1 +; CHECK-NEXT: mv a5, t1 +; CHECK-NEXT: bltu t1, a3, .LBB171_10 +; CHECK-NEXT: # %bb.9: +; CHECK-NEXT: mv a5, a3 +; CHECK-NEXT: .LBB171_10: +; CHECK-NEXT: li a6, 0 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vx v16, v1, a1 +; CHECK-NEXT: vl8re64.v v8, (a2) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v18, v8, v24, v0.t +; CHECK-NEXT: add a0, a4, a1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: sub a0, t1, a3 +; CHECK-NEXT: vslideup.vx v17, v18, a4 +; CHECK-NEXT: bltu t1, a0, .LBB171_12 +; CHECK-NEXT: # %bb.11: +; CHECK-NEXT: mv a6, a0 +; CHECK-NEXT: .LBB171_12: +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t +; CHECK-NEXT: slli a0, a1, 1 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v17, v16, a0 +; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.vp.fcmp.nxv32f64( %va, %vb, metadata !"oeq", %m, i32 %evl) + ret %v +}