diff --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll --- a/llvm/test/CodeGen/RISCV/double-imm.ll +++ b/llvm/test/CodeGen/RISCV/double-imm.ll @@ -1,22 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IFD %s +; RUN: -target-abi=ilp32d | FileCheck -check-prefix=RV32IFD %s ; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IFD %s +; RUN: -target-abi=lp64d | FileCheck -check-prefix=RV64IFD %s define double @double_imm() nounwind { ; RV32IFD-LABEL: double_imm: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: lui a0, 345155 -; RV32IFD-NEXT: addi a0, a0, -744 -; RV32IFD-NEXT: lui a1, 262290 -; RV32IFD-NEXT: addi a1, a1, 507 +; RV32IFD-NEXT: lui a0, %hi(.LCPI0_0) +; RV32IFD-NEXT: fld fa0, %lo(.LCPI0_0)(a0) ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: double_imm: ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: lui a0, %hi(.LCPI0_0) -; RV64IFD-NEXT: ld a0, %lo(.LCPI0_0)(a0) +; RV64IFD-NEXT: fld fa0, %lo(.LCPI0_0)(a0) ; RV64IFD-NEXT: ret ret double 3.1415926535897931159979634685441851615905761718750 } @@ -24,26 +22,16 @@ define double @double_imm_op(double %a) nounwind { ; RV32IFD-LABEL: double_imm_op: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) ; RV32IFD-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IFD-NEXT: fld ft1, %lo(.LCPI1_0)(a0) -; RV32IFD-NEXT: fadd.d ft0, ft0, ft1 -; RV32IFD-NEXT: fsd ft0, 8(sp) -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: fld ft0, %lo(.LCPI1_0)(a0) +; RV32IFD-NEXT: fadd.d fa0, fa0, ft0 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: double_imm_op: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a1, %hi(.LCPI1_0) -; RV64IFD-NEXT: fld ft0, %lo(.LCPI1_0)(a1) -; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 -; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: lui a0, %hi(.LCPI1_0) +; RV64IFD-NEXT: fld ft0, %lo(.LCPI1_0)(a0) +; RV64IFD-NEXT: fadd.d fa0, fa0, ft0 ; RV64IFD-NEXT: ret %1 = fadd double %a, 1.0 ret double %1 diff --git a/llvm/test/CodeGen/RISCV/double-mem.ll b/llvm/test/CodeGen/RISCV/double-mem.ll --- a/llvm/test/CodeGen/RISCV/double-mem.ll +++ b/llvm/test/CodeGen/RISCV/double-mem.ll @@ -1,28 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IFD %s +; RUN: -target-abi=ilp32d | FileCheck -check-prefix=RV32IFD %s ; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IFD %s +; RUN: -target-abi=lp64d | FileCheck -check-prefix=RV64IFD %s define dso_local double @fld(double *%a) nounwind { ; RV32IFD-LABEL: fld: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 ; RV32IFD-NEXT: fld ft0, 0(a0) ; RV32IFD-NEXT: fld ft1, 24(a0) -; RV32IFD-NEXT: fadd.d ft0, ft0, ft1 -; RV32IFD-NEXT: fsd ft0, 8(sp) -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: fadd.d fa0, ft0, ft1 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fld: ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fld ft0, 0(a0) ; RV64IFD-NEXT: fld ft1, 24(a0) -; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 -; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: fadd.d fa0, ft0, ft1 ; RV64IFD-NEXT: ret %1 = load double, double* %a %2 = getelementptr double, double* %a, i32 3 @@ -36,24 +30,14 @@ define dso_local void @fsd(double *%a, double %b, double %c) nounwind { ; RV32IFD-LABEL: fsd: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw a3, 8(sp) -; RV32IFD-NEXT: sw a4, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: sw a1, 8(sp) -; RV32IFD-NEXT: sw a2, 12(sp) -; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV32IFD-NEXT: fadd.d ft0, fa0, fa1 ; RV32IFD-NEXT: fsd ft0, 0(a0) ; RV32IFD-NEXT: fsd ft0, 64(a0) -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fsd: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a2 -; RV64IFD-NEXT: fmv.d.x ft1, a1 -; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV64IFD-NEXT: fadd.d ft0, fa0, fa1 ; RV64IFD-NEXT: fsd ft0, 0(a0) ; RV64IFD-NEXT: fsd ft0, 64(a0) ; RV64IFD-NEXT: ret @@ -72,38 +56,24 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind { ; RV32IFD-LABEL: fld_fsd_global: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw a2, 8(sp) -; RV32IFD-NEXT: sw a3, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV32IFD-NEXT: fadd.d fa0, fa0, fa1 ; RV32IFD-NEXT: lui a0, %hi(G) -; RV32IFD-NEXT: fld ft1, %lo(G)(a0) -; RV32IFD-NEXT: fsd ft0, %lo(G)(a0) +; RV32IFD-NEXT: fld ft0, %lo(G)(a0) +; RV32IFD-NEXT: fsd fa0, %lo(G)(a0) ; RV32IFD-NEXT: addi a0, a0, %lo(G) -; RV32IFD-NEXT: fld ft1, 72(a0) -; RV32IFD-NEXT: fsd ft0, 72(a0) -; RV32IFD-NEXT: fsd ft0, 8(sp) -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: fld ft0, 72(a0) +; RV32IFD-NEXT: fsd fa0, 72(a0) ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fld_fsd_global: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a1 -; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV64IFD-NEXT: fadd.d fa0, fa0, fa1 ; RV64IFD-NEXT: lui a0, %hi(G) -; RV64IFD-NEXT: fld ft1, %lo(G)(a0) -; RV64IFD-NEXT: fsd ft0, %lo(G)(a0) -; RV64IFD-NEXT: addi a1, a0, %lo(G) -; RV64IFD-NEXT: fld ft1, 72(a1) -; RV64IFD-NEXT: fmv.x.d a0, ft0 -; RV64IFD-NEXT: fsd ft0, 72(a1) +; RV64IFD-NEXT: fld ft0, %lo(G)(a0) +; RV64IFD-NEXT: fsd fa0, %lo(G)(a0) +; RV64IFD-NEXT: addi a0, a0, %lo(G) +; RV64IFD-NEXT: fld ft0, 72(a0) +; RV64IFD-NEXT: fsd fa0, 72(a0) ; RV64IFD-NEXT: ret ; Use %a and %b in an FP op to ensure floating point registers are used, even ; for the soft float ABI @@ -120,29 +90,19 @@ define dso_local double @fld_fsd_constant(double %a) nounwind { ; RV32IFD-LABEL: fld_fsd_constant: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) ; RV32IFD-NEXT: lui a0, 912092 -; RV32IFD-NEXT: fld ft1, -273(a0) -; RV32IFD-NEXT: fadd.d ft0, ft0, ft1 -; RV32IFD-NEXT: fsd ft0, -273(a0) -; RV32IFD-NEXT: fsd ft0, 8(sp) -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lw a1, 12(sp) -; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: fld ft0, -273(a0) +; RV32IFD-NEXT: fadd.d fa0, fa0, ft0 +; RV32IFD-NEXT: fsd fa0, -273(a0) ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fld_fsd_constant: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: lui a1, 228023 -; RV64IFD-NEXT: slli a1, a1, 2 -; RV64IFD-NEXT: fld ft0, -273(a1) -; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 -; RV64IFD-NEXT: fmv.x.d a0, ft0 -; RV64IFD-NEXT: fsd ft0, -273(a1) +; RV64IFD-NEXT: lui a0, 228023 +; RV64IFD-NEXT: slli a0, a0, 2 +; RV64IFD-NEXT: fld ft0, -273(a0) +; RV64IFD-NEXT: fadd.d fa0, fa0, ft0 +; RV64IFD-NEXT: fsd fa0, -273(a0) ; RV64IFD-NEXT: ret %1 = inttoptr i32 3735928559 to double* %2 = load volatile double, double* %1 @@ -158,19 +118,14 @@ ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -32 ; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: fsd ft0, 0(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: addi a0, sp, 16 +; RV32IFD-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call notdead@plt -; RV32IFD-NEXT: fld ft0, 16(sp) -; RV32IFD-NEXT: fld ft1, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: fadd.d ft0, ft0, ft1 -; RV32IFD-NEXT: fsd ft0, 8(sp) -; RV32IFD-NEXT: lw a0, 8(sp) -; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fadd.d fa0, ft0, fs0 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 32 ; RV32IFD-NEXT: ret ; @@ -178,15 +133,14 @@ ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: addi sp, sp, -32 ; RV64IFD-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fsd ft0, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: addi a0, sp, 16 +; RV64IFD-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: fmv.d fs0, fa0 +; RV64IFD-NEXT: addi a0, sp, 8 ; RV64IFD-NEXT: call notdead@plt -; RV64IFD-NEXT: fld ft0, 16(sp) -; RV64IFD-NEXT: fld ft1, 8(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: fadd.d ft0, ft0, ft1 -; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: fld ft0, 8(sp) +; RV64IFD-NEXT: fadd.d fa0, ft0, fs0 ; RV64IFD-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 32 ; RV64IFD-NEXT: ret %1 = alloca double, align 8 @@ -200,29 +154,21 @@ define dso_local void @fsd_stack(double %a, double %b) nounwind { ; RV32IFD-LABEL: fsd_stack: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -32 -; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw a2, 8(sp) -; RV32IFD-NEXT: sw a3, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft1, 8(sp) -; RV32IFD-NEXT: fadd.d ft0, ft1, ft0 -; RV32IFD-NEXT: fsd ft0, 16(sp) -; RV32IFD-NEXT: addi a0, sp, 16 +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fadd.d ft0, fa0, fa1 +; RV32IFD-NEXT: fsd ft0, 0(sp) +; RV32IFD-NEXT: mv a0, sp ; RV32IFD-NEXT: call notdead@plt -; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fsd_stack: ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: addi sp, sp, -16 ; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: fmv.d.x ft0, a1 -; RV64IFD-NEXT: fmv.d.x ft1, a0 -; RV64IFD-NEXT: fadd.d ft0, ft1, ft0 +; RV64IFD-NEXT: fadd.d ft0, fa0, fa1 ; RV64IFD-NEXT: fsd ft0, 0(sp) ; RV64IFD-NEXT: mv a0, sp ; RV64IFD-NEXT: call notdead@plt @@ -241,19 +187,13 @@ define dso_local void @fsd_trunc(float* %a, double %b) nounwind noinline optnone { ; RV32IFD-LABEL: fsd_trunc: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw a1, 8(sp) -; RV32IFD-NEXT: sw a2, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: fcvt.s.d ft0, ft0 +; RV32IFD-NEXT: fcvt.s.d ft0, fa0 ; RV32IFD-NEXT: fsw ft0, 0(a0) -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fsd_trunc: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fmv.d.x ft0, a1 -; RV64IFD-NEXT: fcvt.s.d ft0, ft0 +; RV64IFD-NEXT: fcvt.s.d ft0, fa0 ; RV64IFD-NEXT: fsw ft0, 0(a0) ; RV64IFD-NEXT: ret %1 = fptrunc double %b to float diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll --- a/llvm/test/CodeGen/RISCV/float-imm.ll +++ b/llvm/test/CodeGen/RISCV/float-imm.ll @@ -1,22 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: -target-abi=ilp32f | FileCheck -check-prefix=RV32IF %s ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IF %s +; RUN: -target-abi=lp64f | FileCheck -check-prefix=RV64IF %s ; TODO: constant pool shouldn't be necessary for RV64IF. define float @float_imm() nounwind { ; RV32IF-LABEL: float_imm: ; RV32IF: # %bb.0: -; RV32IF-NEXT: lui a0, 263313 -; RV32IF-NEXT: addi a0, a0, -37 +; RV32IF-NEXT: lui a0, %hi(.LCPI0_0) +; RV32IF-NEXT: flw fa0, %lo(.LCPI0_0)(a0) ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: float_imm: ; RV64IF: # %bb.0: ; RV64IF-NEXT: lui a0, %hi(.LCPI0_0) -; RV64IF-NEXT: flw ft0, %lo(.LCPI0_0)(a0) -; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: flw fa0, %lo(.LCPI0_0)(a0) ; RV64IF-NEXT: ret ret float 3.14159274101257324218750 } @@ -24,20 +23,16 @@ define float @float_imm_op(float %a) nounwind { ; RV32IF-LABEL: float_imm_op: ; RV32IF: # %bb.0: -; RV32IF-NEXT: lui a1, %hi(.LCPI1_0) -; RV32IF-NEXT: flw ft0, %lo(.LCPI1_0)(a1) -; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: fadd.s ft0, ft1, ft0 -; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IF-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; RV32IF-NEXT: fadd.s fa0, fa0, ft0 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: float_imm_op: ; RV64IF: # %bb.0: -; RV64IF-NEXT: lui a1, %hi(.LCPI1_0) -; RV64IF-NEXT: flw ft0, %lo(.LCPI1_0)(a1) -; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: fadd.s ft0, ft1, ft0 -; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: lui a0, %hi(.LCPI1_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; RV64IF-NEXT: fadd.s fa0, fa0, ft0 ; RV64IF-NEXT: ret %1 = fadd float %a, 1.0 ret float %1 diff --git a/llvm/test/CodeGen/RISCV/float-mem.ll b/llvm/test/CodeGen/RISCV/float-mem.ll --- a/llvm/test/CodeGen/RISCV/float-mem.ll +++ b/llvm/test/CodeGen/RISCV/float-mem.ll @@ -1,24 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: -target-abi=ilp32f | FileCheck -check-prefix=RV32IF %s ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefix=RV64IF %s +; RUN: -target-abi=lp64f | FileCheck -check-prefix=RV64IF %s define dso_local float @flw(float *%a) nounwind { ; RV32IF-LABEL: flw: ; RV32IF: # %bb.0: ; RV32IF-NEXT: flw ft0, 0(a0) ; RV32IF-NEXT: flw ft1, 12(a0) -; RV32IF-NEXT: fadd.s ft0, ft0, ft1 -; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: fadd.s fa0, ft0, ft1 ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: flw: ; RV64IF: # %bb.0: ; RV64IF-NEXT: flw ft0, 0(a0) ; RV64IF-NEXT: flw ft1, 12(a0) -; RV64IF-NEXT: fadd.s ft0, ft0, ft1 -; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: fadd.s fa0, ft0, ft1 ; RV64IF-NEXT: ret %1 = load float, float* %a %2 = getelementptr float, float* %a, i32 3 @@ -34,18 +32,14 @@ ; for the soft float ABI ; RV32IF-LABEL: fsw: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a2 -; RV32IF-NEXT: fmv.w.x ft1, a1 -; RV32IF-NEXT: fadd.s ft0, ft1, ft0 +; RV32IF-NEXT: fadd.s ft0, fa0, fa1 ; RV32IF-NEXT: fsw ft0, 0(a0) ; RV32IF-NEXT: fsw ft0, 32(a0) ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fsw: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a2 -; RV64IF-NEXT: fmv.w.x ft1, a1 -; RV64IF-NEXT: fadd.s ft0, ft1, ft0 +; RV64IF-NEXT: fadd.s ft0, fa0, fa1 ; RV64IF-NEXT: fsw ft0, 0(a0) ; RV64IF-NEXT: fsw ft0, 32(a0) ; RV64IF-NEXT: ret @@ -64,30 +58,24 @@ ; for the soft float ABI ; RV32IF-LABEL: flw_fsw_global: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fmv.w.x ft0, a1 -; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: fadd.s ft0, ft1, ft0 +; RV32IF-NEXT: fadd.s fa0, fa0, fa1 ; RV32IF-NEXT: lui a0, %hi(G) -; RV32IF-NEXT: flw ft1, %lo(G)(a0) -; RV32IF-NEXT: fsw ft0, %lo(G)(a0) -; RV32IF-NEXT: addi a1, a0, %lo(G) -; RV32IF-NEXT: flw ft1, 36(a1) -; RV32IF-NEXT: fmv.x.w a0, ft0 -; RV32IF-NEXT: fsw ft0, 36(a1) +; RV32IF-NEXT: flw ft0, %lo(G)(a0) +; RV32IF-NEXT: fsw fa0, %lo(G)(a0) +; RV32IF-NEXT: addi a0, a0, %lo(G) +; RV32IF-NEXT: flw ft0, 36(a0) +; RV32IF-NEXT: fsw fa0, 36(a0) ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: flw_fsw_global: ; RV64IF: # %bb.0: -; RV64IF-NEXT: fmv.w.x ft0, a1 -; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: fadd.s ft0, ft1, ft0 +; RV64IF-NEXT: fadd.s fa0, fa0, fa1 ; RV64IF-NEXT: lui a0, %hi(G) -; RV64IF-NEXT: flw ft1, %lo(G)(a0) -; RV64IF-NEXT: fsw ft0, %lo(G)(a0) -; RV64IF-NEXT: addi a1, a0, %lo(G) -; RV64IF-NEXT: flw ft1, 36(a1) -; RV64IF-NEXT: fmv.x.w a0, ft0 -; RV64IF-NEXT: fsw ft0, 36(a1) +; RV64IF-NEXT: flw ft0, %lo(G)(a0) +; RV64IF-NEXT: fsw fa0, %lo(G)(a0) +; RV64IF-NEXT: addi a0, a0, %lo(G) +; RV64IF-NEXT: flw ft0, 36(a0) +; RV64IF-NEXT: fsw fa0, 36(a0) ; RV64IF-NEXT: ret %1 = fadd float %a, %b %2 = load volatile float, float* @G @@ -102,23 +90,19 @@ define dso_local float @flw_fsw_constant(float %a) nounwind { ; RV32IF-LABEL: flw_fsw_constant: ; RV32IF: # %bb.0: -; RV32IF-NEXT: lui a1, 912092 -; RV32IF-NEXT: flw ft0, -273(a1) -; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: fadd.s ft0, ft1, ft0 -; RV32IF-NEXT: fmv.x.w a0, ft0 -; RV32IF-NEXT: fsw ft0, -273(a1) +; RV32IF-NEXT: lui a0, 912092 +; RV32IF-NEXT: flw ft0, -273(a0) +; RV32IF-NEXT: fadd.s fa0, fa0, ft0 +; RV32IF-NEXT: fsw fa0, -273(a0) ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: flw_fsw_constant: ; RV64IF: # %bb.0: -; RV64IF-NEXT: lui a1, 228023 -; RV64IF-NEXT: slli a1, a1, 2 -; RV64IF-NEXT: flw ft0, -273(a1) -; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: fadd.s ft0, ft1, ft0 -; RV64IF-NEXT: fmv.x.w a0, ft0 -; RV64IF-NEXT: fsw ft0, -273(a1) +; RV64IF-NEXT: lui a0, 228023 +; RV64IF-NEXT: slli a0, a0, 2 +; RV64IF-NEXT: flw ft0, -273(a0) +; RV64IF-NEXT: fadd.s fa0, fa0, ft0 +; RV64IF-NEXT: fsw fa0, -273(a0) ; RV64IF-NEXT: ret %1 = inttoptr i32 3735928559 to float* %2 = load volatile float, float* %1 @@ -134,15 +118,14 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fmv.w.x ft0, a0 -; RV32IF-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill -; RV32IF-NEXT: addi a0, sp, 8 +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: addi a0, sp, 4 ; RV32IF-NEXT: call notdead@plt -; RV32IF-NEXT: flw ft0, 8(sp) -; RV32IF-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload -; RV32IF-NEXT: fadd.s ft0, ft0, ft1 -; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: flw ft0, 4(sp) +; RV32IF-NEXT: fadd.s fa0, ft0, fs0 ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; @@ -150,15 +133,14 @@ ; RV64IF: # %bb.0: ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: fmv.w.x ft0, a0 -; RV64IF-NEXT: fsw ft0, 0(sp) # 4-byte Folded Spill -; RV64IF-NEXT: addi a0, sp, 4 +; RV64IF-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; RV64IF-NEXT: fmv.s fs0, fa0 +; RV64IF-NEXT: mv a0, sp ; RV64IF-NEXT: call notdead@plt -; RV64IF-NEXT: flw ft0, 4(sp) -; RV64IF-NEXT: flw ft1, 0(sp) # 4-byte Folded Reload -; RV64IF-NEXT: fadd.s ft0, ft0, ft1 -; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: flw ft0, 0(sp) +; RV64IF-NEXT: fadd.s fa0, ft0, fs0 ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret %1 = alloca float, align 4 @@ -174,9 +156,7 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fmv.w.x ft0, a1 -; RV32IF-NEXT: fmv.w.x ft1, a0 -; RV32IF-NEXT: fadd.s ft0, ft1, ft0 +; RV32IF-NEXT: fadd.s ft0, fa0, fa1 ; RV32IF-NEXT: fsw ft0, 8(sp) ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call notdead@plt @@ -188,9 +168,7 @@ ; RV64IF: # %bb.0: ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: fmv.w.x ft0, a1 -; RV64IF-NEXT: fmv.w.x ft1, a0 -; RV64IF-NEXT: fadd.s ft0, ft1, ft0 +; RV64IF-NEXT: fadd.s ft0, fa0, fa1 ; RV64IF-NEXT: fsw ft0, 4(sp) ; RV64IF-NEXT: addi a0, sp, 4 ; RV64IF-NEXT: call notdead@plt diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=RV32,RV32IF %s +; RUN: -target-abi=ilp32f | FileCheck -check-prefixes=RV32,RV32IF %s ; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=RV64,RV64IF %s +; RUN: -target-abi=lp64f | FileCheck -check-prefixes=RV64,RV64IF %s ; RUN: llc -mtriple=riscv32 -mattr=+f,+d -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=RV32,RV32IFD %s +; RUN: -target-abi=ilp32d | FileCheck -check-prefixes=RV32,RV32IFD %s ; RUN: llc -mtriple=riscv64 -mattr=+f,+d -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=RV64,RV64IFD %s +; RUN: -target-abi=lp64d | FileCheck -check-prefixes=RV64,RV64IFD %s ; i32 saturate @@ -71,23 +71,16 @@ ; ; RV32IFD-LABEL: stest_f64i32: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: feq.d a0, fa0, fa0 ; RV32IFD-NEXT: beqz a0, .LBB0_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV32IFD-NEXT: .LBB0_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: stest_f64i32: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz ; RV64IFD-NEXT: lui a1, 524288 ; RV64IFD-NEXT: addiw a2, a1, -1 ; RV64IFD-NEXT: bge a0, a2, .LBB0_3 @@ -154,23 +147,16 @@ ; ; RV32IFD-LABEL: utest_f64i32: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: feq.d a0, fa0, fa0 ; RV32IFD-NEXT: beqz a0, .LBB1_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz ; RV32IFD-NEXT: .LBB1_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: utest_f64i32: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rtz ; RV64IFD-NEXT: li a1, -1 ; RV64IFD-NEXT: srli a1, a1, 32 ; RV64IFD-NEXT: bltu a0, a1, .LBB1_2 @@ -245,23 +231,16 @@ ; ; RV32IFD-LABEL: ustest_f64i32: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: feq.d a0, fa0, fa0 ; RV32IFD-NEXT: beqz a0, .LBB2_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz ; RV32IFD-NEXT: .LBB2_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: ustest_f64i32: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz ; RV64IFD-NEXT: li a1, -1 ; RV64IFD-NEXT: srli a1, a1, 32 ; RV64IFD-NEXT: bge a0, a1, .LBB2_3 @@ -288,18 +267,16 @@ define i32 @stest_f32i32(float %x) { ; RV32-LABEL: stest_f32i32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: feq.s a0, fa0, fa0 ; RV32-NEXT: beqz a0, .LBB3_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: .LBB3_2: # %entry ; RV32-NEXT: ret ; ; RV64-LABEL: stest_f32i32: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: lui a1, 524288 ; RV64-NEXT: addiw a2, a1, -1 ; RV64-NEXT: bge a0, a2, .LBB3_3 @@ -326,18 +303,16 @@ define i32 @utest_f32i32(float %x) { ; RV32-LABEL: utest_f32i32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: feq.s a0, fa0, fa0 ; RV32-NEXT: beqz a0, .LBB4_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32-NEXT: fcvt.wu.s a0, fa0, rtz ; RV32-NEXT: .LBB4_2: # %entry ; RV32-NEXT: ret ; ; RV64-LABEL: utest_f32i32: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64-NEXT: fcvt.lu.s a0, fa0, rtz ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: bltu a0, a1, .LBB4_2 @@ -356,18 +331,16 @@ define i32 @ustest_f32i32(float %x) { ; RV32-LABEL: ustest_f32i32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: feq.s a0, fa0, fa0 ; RV32-NEXT: beqz a0, .LBB5_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32-NEXT: fcvt.wu.s a0, fa0, rtz ; RV32-NEXT: .LBB5_2: # %entry ; RV32-NEXT: ret ; ; RV64-LABEL: ustest_f32i32: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: bge a0, a1, .LBB5_3 @@ -398,6 +371,7 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixsfdi@plt ; RV32-NEXT: lui a2, 524288 @@ -436,9 +410,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: lui a1, 524288 ; RV64-NEXT: addiw a2, a1, -1 ; RV64-NEXT: blt a0, a2, .LBB6_2 @@ -469,6 +443,7 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixunssfdi@plt ; RV32-NEXT: beqz a1, .LBB7_2 @@ -493,9 +468,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64-NEXT: fcvt.lu.s a0, fa0, rtz ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: bltu a0, a1, .LBB7_2 @@ -520,6 +495,7 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixsfdi@plt ; RV32-NEXT: beqz a1, .LBB8_2 @@ -556,9 +532,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: blt a0, a1, .LBB8_2 @@ -631,12 +607,7 @@ ; ; RV32IFD-LABEL: stest_f64i16: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV32IFD-NEXT: lui a1, 8 ; RV32IFD-NEXT: addi a1, a1, -1 ; RV32IFD-NEXT: bge a0, a1, .LBB9_3 @@ -644,7 +615,6 @@ ; RV32IFD-NEXT: lui a1, 1048568 ; RV32IFD-NEXT: bge a1, a0, .LBB9_4 ; RV32IFD-NEXT: .LBB9_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB9_3: # %entry ; RV32IFD-NEXT: mv a0, a1 @@ -652,13 +622,11 @@ ; RV32IFD-NEXT: blt a1, a0, .LBB9_2 ; RV32IFD-NEXT: .LBB9_4: # %entry ; RV32IFD-NEXT: lui a0, 1048568 -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: stest_f64i16: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV64IFD-NEXT: lui a1, 8 ; RV64IFD-NEXT: addiw a1, a1, -1 ; RV64IFD-NEXT: bge a0, a1, .LBB9_3 @@ -721,25 +689,18 @@ ; ; RV32IFD-LABEL: utest_f64i16: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz ; RV32IFD-NEXT: lui a1, 16 ; RV32IFD-NEXT: addi a1, a1, -1 ; RV32IFD-NEXT: bltu a0, a1, .LBB10_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: mv a0, a1 ; RV32IFD-NEXT: .LBB10_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: utest_f64i16: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz ; RV64IFD-NEXT: lui a1, 16 ; RV64IFD-NEXT: addiw a1, a1, -1 ; RV64IFD-NEXT: bltu a0, a1, .LBB10_2 @@ -800,32 +761,24 @@ ; ; RV32IFD-LABEL: ustest_f64i16: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV32IFD-NEXT: lui a1, 16 ; RV32IFD-NEXT: addi a1, a1, -1 ; RV32IFD-NEXT: bge a0, a1, .LBB11_3 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: blez a0, .LBB11_4 ; RV32IFD-NEXT: .LBB11_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB11_3: # %entry ; RV32IFD-NEXT: mv a0, a1 ; RV32IFD-NEXT: bgtz a0, .LBB11_2 ; RV32IFD-NEXT: .LBB11_4: # %entry ; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: ustest_f64i16: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV64IFD-NEXT: lui a1, 16 ; RV64IFD-NEXT: addiw a1, a1, -1 ; RV64IFD-NEXT: bge a0, a1, .LBB11_3 @@ -852,8 +805,7 @@ define i16 @stest_f32i16(float %x) { ; RV32-LABEL: stest_f32i16: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: lui a1, 8 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: bge a0, a1, .LBB12_3 @@ -872,8 +824,7 @@ ; ; RV64-LABEL: stest_f32i16: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.w.s a0, ft0, rtz +; RV64-NEXT: fcvt.w.s a0, fa0, rtz ; RV64-NEXT: lui a1, 8 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: bge a0, a1, .LBB12_3 @@ -902,8 +853,7 @@ define i16 @utest_f32i16(float %x) { ; RV32-LABEL: utest_f32i16: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32-NEXT: fcvt.wu.s a0, fa0, rtz ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: bltu a0, a1, .LBB13_2 @@ -914,8 +864,7 @@ ; ; RV64-LABEL: utest_f32i16: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64-NEXT: fcvt.wu.s a0, fa0, rtz ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: bltu a0, a1, .LBB13_2 @@ -934,8 +883,7 @@ define i16 @ustest_f32i16(float %x) { ; RV32-LABEL: ustest_f32i16: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: bge a0, a1, .LBB14_3 @@ -952,8 +900,7 @@ ; ; RV64-LABEL: ustest_f32i16: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.w.s a0, ft0, rtz +; RV64-NEXT: fcvt.w.s a0, fa0, rtz ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: bge a0, a1, .LBB14_3 @@ -984,9 +931,9 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: lui a1, 8 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: blt a0, a1, .LBB15_2 @@ -1008,9 +955,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: lui a1, 8 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: blt a0, a1, .LBB15_2 @@ -1042,9 +989,9 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32-NEXT: fcvt.wu.s a0, fa0, rtz ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: bltu a0, a1, .LBB16_2 @@ -1061,9 +1008,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64-NEXT: fcvt.lu.s a0, fa0, rtz ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: bltu a0, a1, .LBB16_2 @@ -1088,9 +1035,9 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: blt a0, a1, .LBB17_2 @@ -1111,9 +1058,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: blt a0, a1, .LBB17_2 @@ -1140,66 +1087,66 @@ ; i64 saturate define i64 @stest_f64i64(double %x) { -; RV32-LABEL: stest_f64i64: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: call __fixdfti@plt -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: addi a5, a4, -1 -; RV32-NEXT: beq a1, a5, .LBB18_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a7, a1, a5 -; RV32-NEXT: or a6, a3, a2 -; RV32-NEXT: bnez a6, .LBB18_3 -; RV32-NEXT: j .LBB18_4 -; RV32-NEXT: .LBB18_2: -; RV32-NEXT: addi a6, a0, 1 -; RV32-NEXT: snez a7, a6 -; RV32-NEXT: or a6, a3, a2 -; RV32-NEXT: beqz a6, .LBB18_4 -; RV32-NEXT: .LBB18_3: # %entry -; RV32-NEXT: slti a7, a2, 0 -; RV32-NEXT: .LBB18_4: # %entry -; RV32-NEXT: li a6, -1 -; RV32-NEXT: beqz a7, .LBB18_7 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: beq a1, a4, .LBB18_8 -; RV32-NEXT: .LBB18_6: # %entry -; RV32-NEXT: sltu a4, a4, a1 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: bne a3, a6, .LBB18_9 -; RV32-NEXT: j .LBB18_10 -; RV32-NEXT: .LBB18_7: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: li a3, 0 -; RV32-NEXT: li a0, -1 -; RV32-NEXT: mv a1, a5 -; RV32-NEXT: bne a1, a4, .LBB18_6 -; RV32-NEXT: .LBB18_8: -; RV32-NEXT: snez a4, a0 -; RV32-NEXT: and a3, a3, a2 -; RV32-NEXT: beq a3, a6, .LBB18_10 -; RV32-NEXT: .LBB18_9: # %entry -; RV32-NEXT: slt a4, a6, a2 -; RV32-NEXT: .LBB18_10: # %entry -; RV32-NEXT: bnez a4, .LBB18_12 -; RV32-NEXT: # %bb.11: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: .LBB18_12: # %entry -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret +; RV32IF-LABEL: stest_f64i64: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -32 +; RV32IF-NEXT: .cfi_def_cfa_offset 32 +; RV32IF-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: addi a0, sp, 8 +; RV32IF-NEXT: call __fixdfti@plt +; RV32IF-NEXT: lw a2, 20(sp) +; RV32IF-NEXT: lw a3, 16(sp) +; RV32IF-NEXT: lw a1, 12(sp) +; RV32IF-NEXT: lw a0, 8(sp) +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: addi a5, a4, -1 +; RV32IF-NEXT: beq a1, a5, .LBB18_2 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: sltu a7, a1, a5 +; RV32IF-NEXT: or a6, a3, a2 +; RV32IF-NEXT: bnez a6, .LBB18_3 +; RV32IF-NEXT: j .LBB18_4 +; RV32IF-NEXT: .LBB18_2: +; RV32IF-NEXT: addi a6, a0, 1 +; RV32IF-NEXT: snez a7, a6 +; RV32IF-NEXT: or a6, a3, a2 +; RV32IF-NEXT: beqz a6, .LBB18_4 +; RV32IF-NEXT: .LBB18_3: # %entry +; RV32IF-NEXT: slti a7, a2, 0 +; RV32IF-NEXT: .LBB18_4: # %entry +; RV32IF-NEXT: li a6, -1 +; RV32IF-NEXT: beqz a7, .LBB18_7 +; RV32IF-NEXT: # %bb.5: # %entry +; RV32IF-NEXT: beq a1, a4, .LBB18_8 +; RV32IF-NEXT: .LBB18_6: # %entry +; RV32IF-NEXT: sltu a4, a4, a1 +; RV32IF-NEXT: and a3, a3, a2 +; RV32IF-NEXT: bne a3, a6, .LBB18_9 +; RV32IF-NEXT: j .LBB18_10 +; RV32IF-NEXT: .LBB18_7: # %entry +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: li a3, 0 +; RV32IF-NEXT: li a0, -1 +; RV32IF-NEXT: mv a1, a5 +; RV32IF-NEXT: bne a1, a4, .LBB18_6 +; RV32IF-NEXT: .LBB18_8: +; RV32IF-NEXT: snez a4, a0 +; RV32IF-NEXT: and a3, a3, a2 +; RV32IF-NEXT: beq a3, a6, .LBB18_10 +; RV32IF-NEXT: .LBB18_9: # %entry +; RV32IF-NEXT: slt a4, a6, a2 +; RV32IF-NEXT: .LBB18_10: # %entry +; RV32IF-NEXT: bnez a4, .LBB18_12 +; RV32IF-NEXT: # %bb.11: # %entry +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: .LBB18_12: # %entry +; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 32 +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: stest_f64i64: ; RV64IF: # %bb.0: # %entry @@ -1238,13 +1185,71 @@ ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret ; +; RV32IFD-LABEL: stest_f64i64: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: .cfi_def_cfa_offset 32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: .cfi_offset ra, -4 +; RV32IFD-NEXT: addi a0, sp, 8 +; RV32IFD-NEXT: call __fixdfti@plt +; RV32IFD-NEXT: lw a2, 20(sp) +; RV32IFD-NEXT: lw a3, 16(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: addi a5, a4, -1 +; RV32IFD-NEXT: beq a1, a5, .LBB18_2 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: sltu a7, a1, a5 +; RV32IFD-NEXT: or a6, a3, a2 +; RV32IFD-NEXT: bnez a6, .LBB18_3 +; RV32IFD-NEXT: j .LBB18_4 +; RV32IFD-NEXT: .LBB18_2: +; RV32IFD-NEXT: addi a6, a0, 1 +; RV32IFD-NEXT: snez a7, a6 +; RV32IFD-NEXT: or a6, a3, a2 +; RV32IFD-NEXT: beqz a6, .LBB18_4 +; RV32IFD-NEXT: .LBB18_3: # %entry +; RV32IFD-NEXT: slti a7, a2, 0 +; RV32IFD-NEXT: .LBB18_4: # %entry +; RV32IFD-NEXT: li a6, -1 +; RV32IFD-NEXT: beqz a7, .LBB18_7 +; RV32IFD-NEXT: # %bb.5: # %entry +; RV32IFD-NEXT: beq a1, a4, .LBB18_8 +; RV32IFD-NEXT: .LBB18_6: # %entry +; RV32IFD-NEXT: sltu a4, a4, a1 +; RV32IFD-NEXT: and a3, a3, a2 +; RV32IFD-NEXT: bne a3, a6, .LBB18_9 +; RV32IFD-NEXT: j .LBB18_10 +; RV32IFD-NEXT: .LBB18_7: # %entry +; RV32IFD-NEXT: li a2, 0 +; RV32IFD-NEXT: li a3, 0 +; RV32IFD-NEXT: li a0, -1 +; RV32IFD-NEXT: mv a1, a5 +; RV32IFD-NEXT: bne a1, a4, .LBB18_6 +; RV32IFD-NEXT: .LBB18_8: +; RV32IFD-NEXT: snez a4, a0 +; RV32IFD-NEXT: and a3, a3, a2 +; RV32IFD-NEXT: beq a3, a6, .LBB18_10 +; RV32IFD-NEXT: .LBB18_9: # %entry +; RV32IFD-NEXT: slt a4, a6, a2 +; RV32IFD-NEXT: .LBB18_10: # %entry +; RV32IFD-NEXT: bnez a4, .LBB18_12 +; RV32IFD-NEXT: # %bb.11: # %entry +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: .LBB18_12: # %entry +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: ret +; ; RV64IFD-LABEL: stest_f64i64: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: feq.d a0, ft0, ft0 +; RV64IFD-NEXT: feq.d a0, fa0, fa0 ; RV64IFD-NEXT: beqz a0, .LBB18_2 ; RV64IFD-NEXT: # %bb.1: -; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz ; RV64IFD-NEXT: .LBB18_2: # %entry ; RV64IFD-NEXT: ret entry: @@ -1258,43 +1263,43 @@ } define i64 @utest_f64i64(double %x) { -; RV32-LABEL: utest_f64i64: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: call __fixunsdfti@plt -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a1, 16(sp) -; RV32-NEXT: beqz a0, .LBB19_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: j .LBB19_3 -; RV32-NEXT: .LBB19_2: -; RV32-NEXT: seqz a2, a1 -; RV32-NEXT: .LBB19_3: # %entry -; RV32-NEXT: xori a1, a1, 1 -; RV32-NEXT: or a1, a1, a0 -; RV32-NEXT: li a0, 0 -; RV32-NEXT: beqz a1, .LBB19_5 -; RV32-NEXT: # %bb.4: # %entry -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: .LBB19_5: # %entry -; RV32-NEXT: bnez a0, .LBB19_7 -; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: li a1, 0 -; RV32-NEXT: j .LBB19_8 -; RV32-NEXT: .LBB19_7: -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: .LBB19_8: # %entry -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret +; RV32IF-LABEL: utest_f64i64: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -32 +; RV32IF-NEXT: .cfi_def_cfa_offset 32 +; RV32IF-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: addi a0, sp, 8 +; RV32IF-NEXT: call __fixunsdfti@plt +; RV32IF-NEXT: lw a0, 20(sp) +; RV32IF-NEXT: lw a1, 16(sp) +; RV32IF-NEXT: beqz a0, .LBB19_2 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: j .LBB19_3 +; RV32IF-NEXT: .LBB19_2: +; RV32IF-NEXT: seqz a2, a1 +; RV32IF-NEXT: .LBB19_3: # %entry +; RV32IF-NEXT: xori a1, a1, 1 +; RV32IF-NEXT: or a1, a1, a0 +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: beqz a1, .LBB19_5 +; RV32IF-NEXT: # %bb.4: # %entry +; RV32IF-NEXT: mv a0, a2 +; RV32IF-NEXT: .LBB19_5: # %entry +; RV32IF-NEXT: bnez a0, .LBB19_7 +; RV32IF-NEXT: # %bb.6: # %entry +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: j .LBB19_8 +; RV32IF-NEXT: .LBB19_7: +; RV32IF-NEXT: lw a1, 12(sp) +; RV32IF-NEXT: lw a0, 8(sp) +; RV32IF-NEXT: .LBB19_8: # %entry +; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 32 +; RV32IF-NEXT: ret ; ; RV64-LABEL: utest_f64i64: ; RV64: # %bb.0: # %entry @@ -1310,6 +1315,42 @@ ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; RV32IFD-LABEL: utest_f64i64: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: .cfi_def_cfa_offset 32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: .cfi_offset ra, -4 +; RV32IFD-NEXT: addi a0, sp, 8 +; RV32IFD-NEXT: call __fixunsdfti@plt +; RV32IFD-NEXT: lw a0, 20(sp) +; RV32IFD-NEXT: lw a1, 16(sp) +; RV32IFD-NEXT: beqz a0, .LBB19_2 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: li a2, 0 +; RV32IFD-NEXT: j .LBB19_3 +; RV32IFD-NEXT: .LBB19_2: +; RV32IFD-NEXT: seqz a2, a1 +; RV32IFD-NEXT: .LBB19_3: # %entry +; RV32IFD-NEXT: xori a1, a1, 1 +; RV32IFD-NEXT: or a1, a1, a0 +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: beqz a1, .LBB19_5 +; RV32IFD-NEXT: # %bb.4: # %entry +; RV32IFD-NEXT: mv a0, a2 +; RV32IFD-NEXT: .LBB19_5: # %entry +; RV32IFD-NEXT: bnez a0, .LBB19_7 +; RV32IFD-NEXT: # %bb.6: # %entry +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: j .LBB19_8 +; RV32IFD-NEXT: .LBB19_7: +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: .LBB19_8: # %entry +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: ret entry: %conv = fptoui double %x to i128 %0 = icmp ult i128 %conv, 18446744073709551616 @@ -1319,68 +1360,68 @@ } define i64 @ustest_f64i64(double %x) { -; RV32-LABEL: ustest_f64i64: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: call __fixdfti@plt -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a2, .LBB20_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a0, a2, 0 -; RV32-NEXT: j .LBB20_3 -; RV32-NEXT: .LBB20_2: -; RV32-NEXT: seqz a0, a3 -; RV32-NEXT: .LBB20_3: # %entry -; RV32-NEXT: xori a1, a3, 1 -; RV32-NEXT: or a4, a1, a2 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a4, .LBB20_5 -; RV32-NEXT: # %bb.4: # %entry -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: .LBB20_5: # %entry -; RV32-NEXT: bnez a1, .LBB20_9 -; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a2, 0 -; RV32-NEXT: li a3, 1 -; RV32-NEXT: bnez a2, .LBB20_10 -; RV32-NEXT: .LBB20_7: -; RV32-NEXT: snez a4, a3 -; RV32-NEXT: bnez a1, .LBB20_11 -; RV32-NEXT: .LBB20_8: -; RV32-NEXT: snez a5, a0 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: bnez a2, .LBB20_12 -; RV32-NEXT: j .LBB20_13 -; RV32-NEXT: .LBB20_9: -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: beqz a2, .LBB20_7 -; RV32-NEXT: .LBB20_10: # %entry -; RV32-NEXT: sgtz a4, a2 -; RV32-NEXT: beqz a1, .LBB20_8 -; RV32-NEXT: .LBB20_11: # %entry -; RV32-NEXT: snez a5, a1 -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: beqz a2, .LBB20_13 -; RV32-NEXT: .LBB20_12: # %entry -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: .LBB20_13: # %entry -; RV32-NEXT: bnez a5, .LBB20_15 -; RV32-NEXT: # %bb.14: # %entry -; RV32-NEXT: li a0, 0 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: .LBB20_15: # %entry -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret +; RV32IF-LABEL: ustest_f64i64: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -32 +; RV32IF-NEXT: .cfi_def_cfa_offset 32 +; RV32IF-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: addi a0, sp, 8 +; RV32IF-NEXT: call __fixdfti@plt +; RV32IF-NEXT: lw a2, 20(sp) +; RV32IF-NEXT: lw a3, 16(sp) +; RV32IF-NEXT: beqz a2, .LBB20_2 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: slti a0, a2, 0 +; RV32IF-NEXT: j .LBB20_3 +; RV32IF-NEXT: .LBB20_2: +; RV32IF-NEXT: seqz a0, a3 +; RV32IF-NEXT: .LBB20_3: # %entry +; RV32IF-NEXT: xori a1, a3, 1 +; RV32IF-NEXT: or a4, a1, a2 +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: beqz a4, .LBB20_5 +; RV32IF-NEXT: # %bb.4: # %entry +; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: .LBB20_5: # %entry +; RV32IF-NEXT: bnez a1, .LBB20_9 +; RV32IF-NEXT: # %bb.6: # %entry +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: li a3, 1 +; RV32IF-NEXT: bnez a2, .LBB20_10 +; RV32IF-NEXT: .LBB20_7: +; RV32IF-NEXT: snez a4, a3 +; RV32IF-NEXT: bnez a1, .LBB20_11 +; RV32IF-NEXT: .LBB20_8: +; RV32IF-NEXT: snez a5, a0 +; RV32IF-NEXT: or a2, a3, a2 +; RV32IF-NEXT: bnez a2, .LBB20_12 +; RV32IF-NEXT: j .LBB20_13 +; RV32IF-NEXT: .LBB20_9: +; RV32IF-NEXT: lw a1, 12(sp) +; RV32IF-NEXT: lw a0, 8(sp) +; RV32IF-NEXT: beqz a2, .LBB20_7 +; RV32IF-NEXT: .LBB20_10: # %entry +; RV32IF-NEXT: sgtz a4, a2 +; RV32IF-NEXT: beqz a1, .LBB20_8 +; RV32IF-NEXT: .LBB20_11: # %entry +; RV32IF-NEXT: snez a5, a1 +; RV32IF-NEXT: or a2, a3, a2 +; RV32IF-NEXT: beqz a2, .LBB20_13 +; RV32IF-NEXT: .LBB20_12: # %entry +; RV32IF-NEXT: mv a5, a4 +; RV32IF-NEXT: .LBB20_13: # %entry +; RV32IF-NEXT: bnez a5, .LBB20_15 +; RV32IF-NEXT: # %bb.14: # %entry +; RV32IF-NEXT: li a0, 0 +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: .LBB20_15: # %entry +; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 32 +; RV32IF-NEXT: ret ; ; RV64-LABEL: ustest_f64i64: ; RV64: # %bb.0: # %entry @@ -1408,6 +1449,67 @@ ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; RV32IFD-LABEL: ustest_f64i64: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: .cfi_def_cfa_offset 32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: .cfi_offset ra, -4 +; RV32IFD-NEXT: addi a0, sp, 8 +; RV32IFD-NEXT: call __fixdfti@plt +; RV32IFD-NEXT: lw a2, 20(sp) +; RV32IFD-NEXT: lw a3, 16(sp) +; RV32IFD-NEXT: beqz a2, .LBB20_2 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: slti a0, a2, 0 +; RV32IFD-NEXT: j .LBB20_3 +; RV32IFD-NEXT: .LBB20_2: +; RV32IFD-NEXT: seqz a0, a3 +; RV32IFD-NEXT: .LBB20_3: # %entry +; RV32IFD-NEXT: xori a1, a3, 1 +; RV32IFD-NEXT: or a4, a1, a2 +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: beqz a4, .LBB20_5 +; RV32IFD-NEXT: # %bb.4: # %entry +; RV32IFD-NEXT: mv a1, a0 +; RV32IFD-NEXT: .LBB20_5: # %entry +; RV32IFD-NEXT: bnez a1, .LBB20_9 +; RV32IFD-NEXT: # %bb.6: # %entry +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: li a2, 0 +; RV32IFD-NEXT: li a3, 1 +; RV32IFD-NEXT: bnez a2, .LBB20_10 +; RV32IFD-NEXT: .LBB20_7: +; RV32IFD-NEXT: snez a4, a3 +; RV32IFD-NEXT: bnez a1, .LBB20_11 +; RV32IFD-NEXT: .LBB20_8: +; RV32IFD-NEXT: snez a5, a0 +; RV32IFD-NEXT: or a2, a3, a2 +; RV32IFD-NEXT: bnez a2, .LBB20_12 +; RV32IFD-NEXT: j .LBB20_13 +; RV32IFD-NEXT: .LBB20_9: +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: beqz a2, .LBB20_7 +; RV32IFD-NEXT: .LBB20_10: # %entry +; RV32IFD-NEXT: sgtz a4, a2 +; RV32IFD-NEXT: beqz a1, .LBB20_8 +; RV32IFD-NEXT: .LBB20_11: # %entry +; RV32IFD-NEXT: snez a5, a1 +; RV32IFD-NEXT: or a2, a3, a2 +; RV32IFD-NEXT: beqz a2, .LBB20_13 +; RV32IFD-NEXT: .LBB20_12: # %entry +; RV32IFD-NEXT: mv a5, a4 +; RV32IFD-NEXT: .LBB20_13: # %entry +; RV32IFD-NEXT: bnez a5, .LBB20_15 +; RV32IFD-NEXT: # %bb.14: # %entry +; RV32IFD-NEXT: li a0, 0 +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: .LBB20_15: # %entry +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: ret entry: %conv = fptosi double %x to i128 %0 = icmp slt i128 %conv, 18446744073709551616 @@ -1425,7 +1527,6 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a2, 20(sp) @@ -1481,11 +1582,10 @@ ; ; RV64-LABEL: stest_f32i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: feq.s a0, ft0, ft0 +; RV64-NEXT: feq.s a0, fa0, fa0 ; RV64-NEXT: beqz a0, .LBB21_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: .LBB21_2: # %entry ; RV64-NEXT: ret entry: @@ -1505,7 +1605,6 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti@plt ; RV32-NEXT: lw a0, 20(sp) @@ -1565,7 +1664,6 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a2, 20(sp) @@ -1664,8 +1762,8 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a2, 20(sp) @@ -1725,6 +1823,7 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixsfti@plt ; RV64-NEXT: li a2, -1 @@ -1773,8 +1872,8 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti@plt ; RV32-NEXT: lw a0, 20(sp) @@ -1811,6 +1910,7 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixunssfti@plt ; RV64-NEXT: beqz a1, .LBB25_2 @@ -1835,8 +1935,8 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a2, 20(sp) @@ -1898,6 +1998,7 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixsfti@plt ; RV64-NEXT: blez a1, .LBB26_2 @@ -2011,23 +2112,16 @@ ; ; RV32IFD-LABEL: stest_f64i32_mm: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: feq.d a0, fa0, fa0 ; RV32IFD-NEXT: beqz a0, .LBB27_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV32IFD-NEXT: .LBB27_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: stest_f64i32_mm: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz ; RV64IFD-NEXT: lui a1, 524288 ; RV64IFD-NEXT: addiw a2, a1, -1 ; RV64IFD-NEXT: bge a0, a2, .LBB27_3 @@ -2084,23 +2178,16 @@ ; ; RV32IFD-LABEL: utest_f64i32_mm: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: feq.d a0, fa0, fa0 ; RV32IFD-NEXT: beqz a0, .LBB28_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz ; RV32IFD-NEXT: .LBB28_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: utest_f64i32_mm: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.lu.d a0, fa0, rtz ; RV64IFD-NEXT: li a1, -1 ; RV64IFD-NEXT: srli a1, a1, 32 ; RV64IFD-NEXT: bltu a0, a1, .LBB28_2 @@ -2178,23 +2265,16 @@ ; ; RV32IFD-LABEL: ustest_f64i32_mm: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: feq.d a0, fa0, fa0 ; RV32IFD-NEXT: beqz a0, .LBB29_2 ; RV32IFD-NEXT: # %bb.1: -; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz ; RV32IFD-NEXT: .LBB29_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: ustest_f64i32_mm: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz ; RV64IFD-NEXT: li a1, -1 ; RV64IFD-NEXT: srli a1, a1, 32 ; RV64IFD-NEXT: bge a0, a1, .LBB29_3 @@ -2219,18 +2299,16 @@ define i32 @stest_f32i32_mm(float %x) { ; RV32-LABEL: stest_f32i32_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: feq.s a0, fa0, fa0 ; RV32-NEXT: beqz a0, .LBB30_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: .LBB30_2: # %entry ; RV32-NEXT: ret ; ; RV64-LABEL: stest_f32i32_mm: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: lui a1, 524288 ; RV64-NEXT: addiw a2, a1, -1 ; RV64-NEXT: bge a0, a2, .LBB30_3 @@ -2255,18 +2333,16 @@ define i32 @utest_f32i32_mm(float %x) { ; RV32-LABEL: utest_f32i32_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: feq.s a0, fa0, fa0 ; RV32-NEXT: beqz a0, .LBB31_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32-NEXT: fcvt.wu.s a0, fa0, rtz ; RV32-NEXT: .LBB31_2: # %entry ; RV32-NEXT: ret ; ; RV64-LABEL: utest_f32i32_mm: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64-NEXT: fcvt.lu.s a0, fa0, rtz ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: bltu a0, a1, .LBB31_2 @@ -2284,18 +2360,16 @@ define i32 @ustest_f32i32_mm(float %x) { ; RV32-LABEL: ustest_f32i32_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: feq.s a0, fa0, fa0 ; RV32-NEXT: beqz a0, .LBB32_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32-NEXT: fcvt.wu.s a0, fa0, rtz ; RV32-NEXT: .LBB32_2: # %entry ; RV32-NEXT: ret ; ; RV64-LABEL: ustest_f32i32_mm: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: bge a0, a1, .LBB32_3 @@ -2324,6 +2398,7 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixsfdi@plt ; RV32-NEXT: lui a2, 524288 @@ -2378,9 +2453,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: lui a1, 524288 ; RV64-NEXT: addiw a2, a1, -1 ; RV64-NEXT: blt a0, a2, .LBB33_2 @@ -2409,6 +2484,7 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixunssfdi@plt ; RV32-NEXT: beqz a1, .LBB34_2 @@ -2425,9 +2501,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64-NEXT: fcvt.lu.s a0, fa0, rtz ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: bltu a0, a1, .LBB34_2 @@ -2451,6 +2527,7 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt ; RV32-NEXT: call __fixsfdi@plt ; RV32-NEXT: mv a2, a0 @@ -2491,9 +2568,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: blt a0, a1, .LBB35_2 @@ -2564,12 +2641,7 @@ ; ; RV32IFD-LABEL: stest_f64i16_mm: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV32IFD-NEXT: lui a1, 8 ; RV32IFD-NEXT: addi a1, a1, -1 ; RV32IFD-NEXT: bge a0, a1, .LBB36_3 @@ -2577,7 +2649,6 @@ ; RV32IFD-NEXT: lui a1, 1048568 ; RV32IFD-NEXT: bge a1, a0, .LBB36_4 ; RV32IFD-NEXT: .LBB36_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB36_3: # %entry ; RV32IFD-NEXT: mv a0, a1 @@ -2585,13 +2656,11 @@ ; RV32IFD-NEXT: blt a1, a0, .LBB36_2 ; RV32IFD-NEXT: .LBB36_4: # %entry ; RV32IFD-NEXT: lui a0, 1048568 -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: stest_f64i16_mm: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV64IFD-NEXT: lui a1, 8 ; RV64IFD-NEXT: addiw a1, a1, -1 ; RV64IFD-NEXT: bge a0, a1, .LBB36_3 @@ -2652,25 +2721,18 @@ ; ; RV32IFD-LABEL: utest_f64i16_mm: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.wu.d a0, fa0, rtz ; RV32IFD-NEXT: lui a1, 16 ; RV32IFD-NEXT: addi a1, a1, -1 ; RV32IFD-NEXT: bltu a0, a1, .LBB37_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: mv a0, a1 ; RV32IFD-NEXT: .LBB37_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: utest_f64i16_mm: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.wu.d a0, fa0, rtz ; RV64IFD-NEXT: lui a1, 16 ; RV64IFD-NEXT: addiw a1, a1, -1 ; RV64IFD-NEXT: bltu a0, a1, .LBB37_2 @@ -2730,32 +2792,24 @@ ; ; RV32IFD-LABEL: ustest_f64i16_mm: ; RV32IFD: # %bb.0: # %entry -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: .cfi_def_cfa_offset 16 -; RV32IFD-NEXT: sw a0, 8(sp) -; RV32IFD-NEXT: sw a1, 12(sp) -; RV32IFD-NEXT: fld ft0, 8(sp) -; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV32IFD-NEXT: lui a1, 16 ; RV32IFD-NEXT: addi a1, a1, -1 ; RV32IFD-NEXT: bge a0, a1, .LBB38_3 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: blez a0, .LBB38_4 ; RV32IFD-NEXT: .LBB38_2: # %entry -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; RV32IFD-NEXT: .LBB38_3: # %entry ; RV32IFD-NEXT: mv a0, a1 ; RV32IFD-NEXT: bgtz a0, .LBB38_2 ; RV32IFD-NEXT: .LBB38_4: # %entry ; RV32IFD-NEXT: li a0, 0 -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: ustest_f64i16_mm: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.w.d a0, fa0, rtz ; RV64IFD-NEXT: lui a1, 16 ; RV64IFD-NEXT: addiw a1, a1, -1 ; RV64IFD-NEXT: bge a0, a1, .LBB38_3 @@ -2780,8 +2834,7 @@ define i16 @stest_f32i16_mm(float %x) { ; RV32-LABEL: stest_f32i16_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: lui a1, 8 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: bge a0, a1, .LBB39_3 @@ -2800,8 +2853,7 @@ ; ; RV64-LABEL: stest_f32i16_mm: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.w.s a0, ft0, rtz +; RV64-NEXT: fcvt.w.s a0, fa0, rtz ; RV64-NEXT: lui a1, 8 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: bge a0, a1, .LBB39_3 @@ -2828,8 +2880,7 @@ define i16 @utest_f32i16_mm(float %x) { ; RV32-LABEL: utest_f32i16_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32-NEXT: fcvt.wu.s a0, fa0, rtz ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: bltu a0, a1, .LBB40_2 @@ -2840,8 +2891,7 @@ ; ; RV64-LABEL: utest_f32i16_mm: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64-NEXT: fcvt.wu.s a0, fa0, rtz ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: bltu a0, a1, .LBB40_2 @@ -2859,8 +2909,7 @@ define i16 @ustest_f32i16_mm(float %x) { ; RV32-LABEL: ustest_f32i16_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: bge a0, a1, .LBB41_3 @@ -2877,8 +2926,7 @@ ; ; RV64-LABEL: ustest_f32i16_mm: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.w.s a0, ft0, rtz +; RV64-NEXT: fcvt.w.s a0, fa0, rtz ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: bge a0, a1, .LBB41_3 @@ -2907,9 +2955,9 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: lui a1, 8 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: blt a0, a1, .LBB42_2 @@ -2931,9 +2979,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: lui a1, 8 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: blt a0, a1, .LBB42_2 @@ -2963,9 +3011,9 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32-NEXT: fcvt.wu.s a0, fa0, rtz ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: bltu a0, a1, .LBB43_2 @@ -2982,9 +3030,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64-NEXT: fcvt.lu.s a0, fa0, rtz ; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 @@ -3009,9 +3057,9 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: fmv.w.x ft0, a0 -; RV32-NEXT: fcvt.w.s a0, ft0, rtz +; RV32-NEXT: fcvt.w.s a0, fa0, rtz ; RV32-NEXT: lui a1, 16 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: blt a0, a1, .LBB44_2 @@ -3032,9 +3080,9 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addiw a1, a1, -1 ; RV64-NEXT: blt a0, a1, .LBB44_2 @@ -3059,123 +3107,123 @@ ; i64 saturate define i64 @stest_f64i64_mm(double %x) { -; RV32-LABEL: stest_f64i64_mm: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: call __fixdfti@plt -; RV32-NEXT: lw a5, 8(sp) -; RV32-NEXT: lw a3, 20(sp) -; RV32-NEXT: lw a1, 12(sp) -; RV32-NEXT: li a2, -1 -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bltz a3, .LBB45_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: li a7, -1 -; RV32-NEXT: .LBB45_2: # %entry -; RV32-NEXT: lui a4, 524288 -; RV32-NEXT: addi a6, a4, -1 -; RV32-NEXT: mv t0, a5 -; RV32-NEXT: bgeu a1, a6, .LBB45_19 -; RV32-NEXT: # %bb.3: # %entry -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: bne a1, a6, .LBB45_20 -; RV32-NEXT: .LBB45_4: # %entry -; RV32-NEXT: or t0, a0, a3 -; RV32-NEXT: bnez t0, .LBB45_21 -; RV32-NEXT: .LBB45_5: # %entry -; RV32-NEXT: mv a7, a1 -; RV32-NEXT: bgez a3, .LBB45_22 -; RV32-NEXT: .LBB45_6: # %entry -; RV32-NEXT: bgeu a1, a6, .LBB45_23 -; RV32-NEXT: .LBB45_7: # %entry -; RV32-NEXT: bnez t0, .LBB45_24 -; RV32-NEXT: .LBB45_8: # %entry -; RV32-NEXT: li a6, 0 -; RV32-NEXT: bnez a3, .LBB45_25 -; RV32-NEXT: .LBB45_9: # %entry -; RV32-NEXT: bgez a3, .LBB45_26 -; RV32-NEXT: .LBB45_10: # %entry -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bgeu a4, a1, .LBB45_27 -; RV32-NEXT: .LBB45_11: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bne a1, a4, .LBB45_28 -; RV32-NEXT: .LBB45_12: # %entry -; RV32-NEXT: bltz a3, .LBB45_29 -; RV32-NEXT: .LBB45_13: # %entry -; RV32-NEXT: and a6, a6, a3 -; RV32-NEXT: bne a6, a2, .LBB45_30 -; RV32-NEXT: .LBB45_14: # %entry -; RV32-NEXT: mv a5, a1 -; RV32-NEXT: bltz a3, .LBB45_31 -; RV32-NEXT: .LBB45_15: # %entry -; RV32-NEXT: bgeu a4, a1, .LBB45_32 -; RV32-NEXT: .LBB45_16: # %entry -; RV32-NEXT: beq a6, a2, .LBB45_18 -; RV32-NEXT: .LBB45_17: # %entry -; RV32-NEXT: mv a1, a5 -; RV32-NEXT: .LBB45_18: # %entry -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; RV32-NEXT: .LBB45_19: # %entry -; RV32-NEXT: li t0, -1 -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: beq a1, a6, .LBB45_4 -; RV32-NEXT: .LBB45_20: # %entry -; RV32-NEXT: mv a5, t0 -; RV32-NEXT: or t0, a0, a3 -; RV32-NEXT: beqz t0, .LBB45_5 -; RV32-NEXT: .LBB45_21: # %entry -; RV32-NEXT: mv a5, a7 -; RV32-NEXT: mv a7, a1 -; RV32-NEXT: bltz a3, .LBB45_6 -; RV32-NEXT: .LBB45_22: # %entry -; RV32-NEXT: mv a7, a6 -; RV32-NEXT: bltu a1, a6, .LBB45_7 -; RV32-NEXT: .LBB45_23: # %entry -; RV32-NEXT: mv a1, a6 -; RV32-NEXT: beqz t0, .LBB45_8 -; RV32-NEXT: .LBB45_24: # %entry -; RV32-NEXT: mv a1, a7 -; RV32-NEXT: li a6, 0 -; RV32-NEXT: beqz a3, .LBB45_9 -; RV32-NEXT: .LBB45_25: # %entry -; RV32-NEXT: srai a6, a3, 31 -; RV32-NEXT: and a6, a6, a0 -; RV32-NEXT: bltz a3, .LBB45_10 -; RV32-NEXT: .LBB45_26: # %entry -; RV32-NEXT: li a3, 0 -; RV32-NEXT: mv a7, a5 -; RV32-NEXT: bltu a4, a1, .LBB45_11 -; RV32-NEXT: .LBB45_27: # %entry -; RV32-NEXT: li a7, 0 -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: beq a1, a4, .LBB45_12 -; RV32-NEXT: .LBB45_28: # %entry -; RV32-NEXT: mv a0, a7 -; RV32-NEXT: bgez a3, .LBB45_13 -; RV32-NEXT: .LBB45_29: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: and a6, a6, a3 -; RV32-NEXT: beq a6, a2, .LBB45_14 -; RV32-NEXT: .LBB45_30: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: mv a5, a1 -; RV32-NEXT: bgez a3, .LBB45_15 -; RV32-NEXT: .LBB45_31: # %entry -; RV32-NEXT: lui a5, 524288 -; RV32-NEXT: bltu a4, a1, .LBB45_16 -; RV32-NEXT: .LBB45_32: # %entry -; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: bne a6, a2, .LBB45_17 -; RV32-NEXT: j .LBB45_18 +; RV32IF-LABEL: stest_f64i64_mm: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -32 +; RV32IF-NEXT: .cfi_def_cfa_offset 32 +; RV32IF-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: addi a0, sp, 8 +; RV32IF-NEXT: call __fixdfti@plt +; RV32IF-NEXT: lw a5, 8(sp) +; RV32IF-NEXT: lw a3, 20(sp) +; RV32IF-NEXT: lw a1, 12(sp) +; RV32IF-NEXT: li a2, -1 +; RV32IF-NEXT: mv a7, a5 +; RV32IF-NEXT: bltz a3, .LBB45_2 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: li a7, -1 +; RV32IF-NEXT: .LBB45_2: # %entry +; RV32IF-NEXT: lui a4, 524288 +; RV32IF-NEXT: addi a6, a4, -1 +; RV32IF-NEXT: mv t0, a5 +; RV32IF-NEXT: bgeu a1, a6, .LBB45_19 +; RV32IF-NEXT: # %bb.3: # %entry +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: bne a1, a6, .LBB45_20 +; RV32IF-NEXT: .LBB45_4: # %entry +; RV32IF-NEXT: or t0, a0, a3 +; RV32IF-NEXT: bnez t0, .LBB45_21 +; RV32IF-NEXT: .LBB45_5: # %entry +; RV32IF-NEXT: mv a7, a1 +; RV32IF-NEXT: bgez a3, .LBB45_22 +; RV32IF-NEXT: .LBB45_6: # %entry +; RV32IF-NEXT: bgeu a1, a6, .LBB45_23 +; RV32IF-NEXT: .LBB45_7: # %entry +; RV32IF-NEXT: bnez t0, .LBB45_24 +; RV32IF-NEXT: .LBB45_8: # %entry +; RV32IF-NEXT: li a6, 0 +; RV32IF-NEXT: bnez a3, .LBB45_25 +; RV32IF-NEXT: .LBB45_9: # %entry +; RV32IF-NEXT: bgez a3, .LBB45_26 +; RV32IF-NEXT: .LBB45_10: # %entry +; RV32IF-NEXT: mv a7, a5 +; RV32IF-NEXT: bgeu a4, a1, .LBB45_27 +; RV32IF-NEXT: .LBB45_11: # %entry +; RV32IF-NEXT: mv a0, a5 +; RV32IF-NEXT: bne a1, a4, .LBB45_28 +; RV32IF-NEXT: .LBB45_12: # %entry +; RV32IF-NEXT: bltz a3, .LBB45_29 +; RV32IF-NEXT: .LBB45_13: # %entry +; RV32IF-NEXT: and a6, a6, a3 +; RV32IF-NEXT: bne a6, a2, .LBB45_30 +; RV32IF-NEXT: .LBB45_14: # %entry +; RV32IF-NEXT: mv a5, a1 +; RV32IF-NEXT: bltz a3, .LBB45_31 +; RV32IF-NEXT: .LBB45_15: # %entry +; RV32IF-NEXT: bgeu a4, a1, .LBB45_32 +; RV32IF-NEXT: .LBB45_16: # %entry +; RV32IF-NEXT: beq a6, a2, .LBB45_18 +; RV32IF-NEXT: .LBB45_17: # %entry +; RV32IF-NEXT: mv a1, a5 +; RV32IF-NEXT: .LBB45_18: # %entry +; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 32 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB45_19: # %entry +; RV32IF-NEXT: li t0, -1 +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: beq a1, a6, .LBB45_4 +; RV32IF-NEXT: .LBB45_20: # %entry +; RV32IF-NEXT: mv a5, t0 +; RV32IF-NEXT: or t0, a0, a3 +; RV32IF-NEXT: beqz t0, .LBB45_5 +; RV32IF-NEXT: .LBB45_21: # %entry +; RV32IF-NEXT: mv a5, a7 +; RV32IF-NEXT: mv a7, a1 +; RV32IF-NEXT: bltz a3, .LBB45_6 +; RV32IF-NEXT: .LBB45_22: # %entry +; RV32IF-NEXT: mv a7, a6 +; RV32IF-NEXT: bltu a1, a6, .LBB45_7 +; RV32IF-NEXT: .LBB45_23: # %entry +; RV32IF-NEXT: mv a1, a6 +; RV32IF-NEXT: beqz t0, .LBB45_8 +; RV32IF-NEXT: .LBB45_24: # %entry +; RV32IF-NEXT: mv a1, a7 +; RV32IF-NEXT: li a6, 0 +; RV32IF-NEXT: beqz a3, .LBB45_9 +; RV32IF-NEXT: .LBB45_25: # %entry +; RV32IF-NEXT: srai a6, a3, 31 +; RV32IF-NEXT: and a6, a6, a0 +; RV32IF-NEXT: bltz a3, .LBB45_10 +; RV32IF-NEXT: .LBB45_26: # %entry +; RV32IF-NEXT: li a3, 0 +; RV32IF-NEXT: mv a7, a5 +; RV32IF-NEXT: bltu a4, a1, .LBB45_11 +; RV32IF-NEXT: .LBB45_27: # %entry +; RV32IF-NEXT: li a7, 0 +; RV32IF-NEXT: mv a0, a5 +; RV32IF-NEXT: beq a1, a4, .LBB45_12 +; RV32IF-NEXT: .LBB45_28: # %entry +; RV32IF-NEXT: mv a0, a7 +; RV32IF-NEXT: bgez a3, .LBB45_13 +; RV32IF-NEXT: .LBB45_29: # %entry +; RV32IF-NEXT: li a5, 0 +; RV32IF-NEXT: and a6, a6, a3 +; RV32IF-NEXT: beq a6, a2, .LBB45_14 +; RV32IF-NEXT: .LBB45_30: # %entry +; RV32IF-NEXT: mv a0, a5 +; RV32IF-NEXT: mv a5, a1 +; RV32IF-NEXT: bgez a3, .LBB45_15 +; RV32IF-NEXT: .LBB45_31: # %entry +; RV32IF-NEXT: lui a5, 524288 +; RV32IF-NEXT: bltu a4, a1, .LBB45_16 +; RV32IF-NEXT: .LBB45_32: # %entry +; RV32IF-NEXT: lui a1, 524288 +; RV32IF-NEXT: bne a6, a2, .LBB45_17 +; RV32IF-NEXT: j .LBB45_18 ; ; RV64IF-LABEL: stest_f64i64_mm: ; RV64IF: # %bb.0: # %entry @@ -3228,13 +3276,128 @@ ; RV64IF-NEXT: bne a1, a2, .LBB45_8 ; RV64IF-NEXT: j .LBB45_9 ; +; RV32IFD-LABEL: stest_f64i64_mm: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: .cfi_def_cfa_offset 32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: .cfi_offset ra, -4 +; RV32IFD-NEXT: addi a0, sp, 8 +; RV32IFD-NEXT: call __fixdfti@plt +; RV32IFD-NEXT: lw a5, 8(sp) +; RV32IFD-NEXT: lw a3, 20(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: li a2, -1 +; RV32IFD-NEXT: mv a7, a5 +; RV32IFD-NEXT: bltz a3, .LBB45_2 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: li a7, -1 +; RV32IFD-NEXT: .LBB45_2: # %entry +; RV32IFD-NEXT: lui a4, 524288 +; RV32IFD-NEXT: addi a6, a4, -1 +; RV32IFD-NEXT: mv t0, a5 +; RV32IFD-NEXT: bgeu a1, a6, .LBB45_19 +; RV32IFD-NEXT: # %bb.3: # %entry +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: bne a1, a6, .LBB45_20 +; RV32IFD-NEXT: .LBB45_4: # %entry +; RV32IFD-NEXT: or t0, a0, a3 +; RV32IFD-NEXT: bnez t0, .LBB45_21 +; RV32IFD-NEXT: .LBB45_5: # %entry +; RV32IFD-NEXT: mv a7, a1 +; RV32IFD-NEXT: bgez a3, .LBB45_22 +; RV32IFD-NEXT: .LBB45_6: # %entry +; RV32IFD-NEXT: bgeu a1, a6, .LBB45_23 +; RV32IFD-NEXT: .LBB45_7: # %entry +; RV32IFD-NEXT: bnez t0, .LBB45_24 +; RV32IFD-NEXT: .LBB45_8: # %entry +; RV32IFD-NEXT: li a6, 0 +; RV32IFD-NEXT: bnez a3, .LBB45_25 +; RV32IFD-NEXT: .LBB45_9: # %entry +; RV32IFD-NEXT: bgez a3, .LBB45_26 +; RV32IFD-NEXT: .LBB45_10: # %entry +; RV32IFD-NEXT: mv a7, a5 +; RV32IFD-NEXT: bgeu a4, a1, .LBB45_27 +; RV32IFD-NEXT: .LBB45_11: # %entry +; RV32IFD-NEXT: mv a0, a5 +; RV32IFD-NEXT: bne a1, a4, .LBB45_28 +; RV32IFD-NEXT: .LBB45_12: # %entry +; RV32IFD-NEXT: bltz a3, .LBB45_29 +; RV32IFD-NEXT: .LBB45_13: # %entry +; RV32IFD-NEXT: and a6, a6, a3 +; RV32IFD-NEXT: bne a6, a2, .LBB45_30 +; RV32IFD-NEXT: .LBB45_14: # %entry +; RV32IFD-NEXT: mv a5, a1 +; RV32IFD-NEXT: bltz a3, .LBB45_31 +; RV32IFD-NEXT: .LBB45_15: # %entry +; RV32IFD-NEXT: bgeu a4, a1, .LBB45_32 +; RV32IFD-NEXT: .LBB45_16: # %entry +; RV32IFD-NEXT: beq a6, a2, .LBB45_18 +; RV32IFD-NEXT: .LBB45_17: # %entry +; RV32IFD-NEXT: mv a1, a5 +; RV32IFD-NEXT: .LBB45_18: # %entry +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB45_19: # %entry +; RV32IFD-NEXT: li t0, -1 +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: beq a1, a6, .LBB45_4 +; RV32IFD-NEXT: .LBB45_20: # %entry +; RV32IFD-NEXT: mv a5, t0 +; RV32IFD-NEXT: or t0, a0, a3 +; RV32IFD-NEXT: beqz t0, .LBB45_5 +; RV32IFD-NEXT: .LBB45_21: # %entry +; RV32IFD-NEXT: mv a5, a7 +; RV32IFD-NEXT: mv a7, a1 +; RV32IFD-NEXT: bltz a3, .LBB45_6 +; RV32IFD-NEXT: .LBB45_22: # %entry +; RV32IFD-NEXT: mv a7, a6 +; RV32IFD-NEXT: bltu a1, a6, .LBB45_7 +; RV32IFD-NEXT: .LBB45_23: # %entry +; RV32IFD-NEXT: mv a1, a6 +; RV32IFD-NEXT: beqz t0, .LBB45_8 +; RV32IFD-NEXT: .LBB45_24: # %entry +; RV32IFD-NEXT: mv a1, a7 +; RV32IFD-NEXT: li a6, 0 +; RV32IFD-NEXT: beqz a3, .LBB45_9 +; RV32IFD-NEXT: .LBB45_25: # %entry +; RV32IFD-NEXT: srai a6, a3, 31 +; RV32IFD-NEXT: and a6, a6, a0 +; RV32IFD-NEXT: bltz a3, .LBB45_10 +; RV32IFD-NEXT: .LBB45_26: # %entry +; RV32IFD-NEXT: li a3, 0 +; RV32IFD-NEXT: mv a7, a5 +; RV32IFD-NEXT: bltu a4, a1, .LBB45_11 +; RV32IFD-NEXT: .LBB45_27: # %entry +; RV32IFD-NEXT: li a7, 0 +; RV32IFD-NEXT: mv a0, a5 +; RV32IFD-NEXT: beq a1, a4, .LBB45_12 +; RV32IFD-NEXT: .LBB45_28: # %entry +; RV32IFD-NEXT: mv a0, a7 +; RV32IFD-NEXT: bgez a3, .LBB45_13 +; RV32IFD-NEXT: .LBB45_29: # %entry +; RV32IFD-NEXT: li a5, 0 +; RV32IFD-NEXT: and a6, a6, a3 +; RV32IFD-NEXT: beq a6, a2, .LBB45_14 +; RV32IFD-NEXT: .LBB45_30: # %entry +; RV32IFD-NEXT: mv a0, a5 +; RV32IFD-NEXT: mv a5, a1 +; RV32IFD-NEXT: bgez a3, .LBB45_15 +; RV32IFD-NEXT: .LBB45_31: # %entry +; RV32IFD-NEXT: lui a5, 524288 +; RV32IFD-NEXT: bltu a4, a1, .LBB45_16 +; RV32IFD-NEXT: .LBB45_32: # %entry +; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: bne a6, a2, .LBB45_17 +; RV32IFD-NEXT: j .LBB45_18 +; ; RV64IFD-LABEL: stest_f64i64_mm: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: fmv.d.x ft0, a0 -; RV64IFD-NEXT: feq.d a0, ft0, ft0 +; RV64IFD-NEXT: feq.d a0, fa0, fa0 ; RV64IFD-NEXT: beqz a0, .LBB45_2 ; RV64IFD-NEXT: # %bb.1: -; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: fcvt.l.d a0, fa0, rtz ; RV64IFD-NEXT: .LBB45_2: # %entry ; RV64IFD-NEXT: ret entry: @@ -3246,53 +3409,53 @@ } define i64 @utest_f64i64_mm(double %x) { -; RV32-LABEL: utest_f64i64_mm: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: call __fixunsdfti@plt -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a0, .LBB46_3 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: beq a2, a1, .LBB46_4 -; RV32-NEXT: .LBB46_2: -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: j .LBB46_5 -; RV32-NEXT: .LBB46_3: -; RV32-NEXT: seqz a2, a3 -; RV32-NEXT: bne a2, a1, .LBB46_2 -; RV32-NEXT: .LBB46_4: # %entry -; RV32-NEXT: mv a4, a1 -; RV32-NEXT: .LBB46_5: # %entry -; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a0 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beq a3, a1, .LBB46_7 -; RV32-NEXT: # %bb.6: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: .LBB46_7: # %entry -; RV32-NEXT: bne a2, a1, .LBB46_9 -; RV32-NEXT: # %bb.8: # %entry -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bne a3, a1, .LBB46_10 -; RV32-NEXT: j .LBB46_11 -; RV32-NEXT: .LBB46_9: -; RV32-NEXT: lw a2, 12(sp) -; RV32-NEXT: beq a3, a1, .LBB46_11 -; RV32-NEXT: .LBB46_10: # %entry -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB46_11: # %entry -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret +; RV32IF-LABEL: utest_f64i64_mm: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -32 +; RV32IF-NEXT: .cfi_def_cfa_offset 32 +; RV32IF-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: addi a0, sp, 8 +; RV32IF-NEXT: call __fixunsdfti@plt +; RV32IF-NEXT: lw a0, 20(sp) +; RV32IF-NEXT: lw a3, 16(sp) +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: beqz a0, .LBB46_3 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: beq a2, a1, .LBB46_4 +; RV32IF-NEXT: .LBB46_2: +; RV32IF-NEXT: lw a4, 8(sp) +; RV32IF-NEXT: j .LBB46_5 +; RV32IF-NEXT: .LBB46_3: +; RV32IF-NEXT: seqz a2, a3 +; RV32IF-NEXT: bne a2, a1, .LBB46_2 +; RV32IF-NEXT: .LBB46_4: # %entry +; RV32IF-NEXT: mv a4, a1 +; RV32IF-NEXT: .LBB46_5: # %entry +; RV32IF-NEXT: xori a3, a3, 1 +; RV32IF-NEXT: or a3, a3, a0 +; RV32IF-NEXT: mv a0, a1 +; RV32IF-NEXT: beq a3, a1, .LBB46_7 +; RV32IF-NEXT: # %bb.6: # %entry +; RV32IF-NEXT: mv a0, a4 +; RV32IF-NEXT: .LBB46_7: # %entry +; RV32IF-NEXT: bne a2, a1, .LBB46_9 +; RV32IF-NEXT: # %bb.8: # %entry +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: bne a3, a1, .LBB46_10 +; RV32IF-NEXT: j .LBB46_11 +; RV32IF-NEXT: .LBB46_9: +; RV32IF-NEXT: lw a2, 12(sp) +; RV32IF-NEXT: beq a3, a1, .LBB46_11 +; RV32IF-NEXT: .LBB46_10: # %entry +; RV32IF-NEXT: mv a1, a2 +; RV32IF-NEXT: .LBB46_11: # %entry +; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 32 +; RV32IF-NEXT: ret ; ; RV64-LABEL: utest_f64i64_mm: ; RV64: # %bb.0: # %entry @@ -3315,6 +3478,52 @@ ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret +; +; RV32IFD-LABEL: utest_f64i64_mm: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: .cfi_def_cfa_offset 32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: .cfi_offset ra, -4 +; RV32IFD-NEXT: addi a0, sp, 8 +; RV32IFD-NEXT: call __fixunsdfti@plt +; RV32IFD-NEXT: lw a0, 20(sp) +; RV32IFD-NEXT: lw a3, 16(sp) +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: beqz a0, .LBB46_3 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: beq a2, a1, .LBB46_4 +; RV32IFD-NEXT: .LBB46_2: +; RV32IFD-NEXT: lw a4, 8(sp) +; RV32IFD-NEXT: j .LBB46_5 +; RV32IFD-NEXT: .LBB46_3: +; RV32IFD-NEXT: seqz a2, a3 +; RV32IFD-NEXT: bne a2, a1, .LBB46_2 +; RV32IFD-NEXT: .LBB46_4: # %entry +; RV32IFD-NEXT: mv a4, a1 +; RV32IFD-NEXT: .LBB46_5: # %entry +; RV32IFD-NEXT: xori a3, a3, 1 +; RV32IFD-NEXT: or a3, a3, a0 +; RV32IFD-NEXT: mv a0, a1 +; RV32IFD-NEXT: beq a3, a1, .LBB46_7 +; RV32IFD-NEXT: # %bb.6: # %entry +; RV32IFD-NEXT: mv a0, a4 +; RV32IFD-NEXT: .LBB46_7: # %entry +; RV32IFD-NEXT: bne a2, a1, .LBB46_9 +; RV32IFD-NEXT: # %bb.8: # %entry +; RV32IFD-NEXT: mv a2, a1 +; RV32IFD-NEXT: bne a3, a1, .LBB46_10 +; RV32IFD-NEXT: j .LBB46_11 +; RV32IFD-NEXT: .LBB46_9: +; RV32IFD-NEXT: lw a2, 12(sp) +; RV32IFD-NEXT: beq a3, a1, .LBB46_11 +; RV32IFD-NEXT: .LBB46_10: # %entry +; RV32IFD-NEXT: mv a1, a2 +; RV32IFD-NEXT: .LBB46_11: # %entry +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: ret entry: %conv = fptoui double %x to i128 %spec.store.select = call i128 @llvm.umin.i128(i128 %conv, i128 18446744073709551616) @@ -3323,117 +3532,117 @@ } define i64 @ustest_f64i64_mm(double %x) { -; RV32-LABEL: ustest_f64i64_mm: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: call __fixdfti@plt -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a2, 20(sp) -; RV32-NEXT: li a1, 1 -; RV32-NEXT: mv a4, a0 -; RV32-NEXT: bgez a2, .LBB47_5 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bgeu a0, a1, .LBB47_6 -; RV32-NEXT: .LBB47_2: # %entry -; RV32-NEXT: beqz a2, .LBB47_7 -; RV32-NEXT: .LBB47_3: # %entry -; RV32-NEXT: slti a1, a2, 0 -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: beqz a1, .LBB47_8 -; RV32-NEXT: .LBB47_4: -; RV32-NEXT: lw a5, 8(sp) -; RV32-NEXT: j .LBB47_9 -; RV32-NEXT: .LBB47_5: # %entry -; RV32-NEXT: li a4, 1 -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bltu a0, a1, .LBB47_2 -; RV32-NEXT: .LBB47_6: # %entry -; RV32-NEXT: li a3, 1 -; RV32-NEXT: bnez a2, .LBB47_3 -; RV32-NEXT: .LBB47_7: -; RV32-NEXT: seqz a1, a0 -; RV32-NEXT: bnez a1, .LBB47_4 -; RV32-NEXT: .LBB47_8: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: .LBB47_9: # %entry -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: or a0, a0, a2 -; RV32-NEXT: li a4, 0 -; RV32-NEXT: beqz a0, .LBB47_11 -; RV32-NEXT: # %bb.10: # %entry -; RV32-NEXT: mv a4, a5 -; RV32-NEXT: .LBB47_11: # %entry -; RV32-NEXT: bnez a1, .LBB47_13 -; RV32-NEXT: # %bb.12: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: li a1, 0 -; RV32-NEXT: bnez a0, .LBB47_14 -; RV32-NEXT: j .LBB47_15 -; RV32-NEXT: .LBB47_13: -; RV32-NEXT: lw a5, 12(sp) -; RV32-NEXT: li a1, 0 -; RV32-NEXT: beqz a0, .LBB47_15 -; RV32-NEXT: .LBB47_14: # %entry -; RV32-NEXT: mv a1, a5 -; RV32-NEXT: .LBB47_15: # %entry -; RV32-NEXT: bgez a2, .LBB47_20 -; RV32-NEXT: # %bb.16: # %entry -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: beqz a1, .LBB47_21 -; RV32-NEXT: .LBB47_17: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: bnez a1, .LBB47_22 -; RV32-NEXT: .LBB47_18: # %entry -; RV32-NEXT: beqz a2, .LBB47_23 -; RV32-NEXT: .LBB47_19: # %entry -; RV32-NEXT: sgtz a5, a2 -; RV32-NEXT: beqz a5, .LBB47_24 -; RV32-NEXT: j .LBB47_25 -; RV32-NEXT: .LBB47_20: # %entry -; RV32-NEXT: li a2, 0 -; RV32-NEXT: mv a5, a4 -; RV32-NEXT: bnez a1, .LBB47_17 -; RV32-NEXT: .LBB47_21: # %entry -; RV32-NEXT: li a5, 0 -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: beqz a1, .LBB47_18 -; RV32-NEXT: .LBB47_22: # %entry -; RV32-NEXT: mv a0, a5 -; RV32-NEXT: bnez a2, .LBB47_19 -; RV32-NEXT: .LBB47_23: -; RV32-NEXT: snez a5, a3 -; RV32-NEXT: bnez a5, .LBB47_25 -; RV32-NEXT: .LBB47_24: # %entry -; RV32-NEXT: li a4, 0 -; RV32-NEXT: .LBB47_25: # %entry -; RV32-NEXT: or a2, a3, a2 -; RV32-NEXT: bnez a2, .LBB47_30 -; RV32-NEXT: # %bb.26: # %entry -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: beqz a5, .LBB47_31 -; RV32-NEXT: .LBB47_27: # %entry -; RV32-NEXT: beqz a2, .LBB47_29 -; RV32-NEXT: .LBB47_28: # %entry -; RV32-NEXT: mv a1, a3 -; RV32-NEXT: .LBB47_29: # %entry -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 -; RV32-NEXT: ret -; RV32-NEXT: .LBB47_30: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: mv a3, a1 -; RV32-NEXT: bnez a5, .LBB47_27 -; RV32-NEXT: .LBB47_31: # %entry -; RV32-NEXT: li a3, 0 -; RV32-NEXT: bnez a2, .LBB47_28 -; RV32-NEXT: j .LBB47_29 +; RV32IF-LABEL: ustest_f64i64_mm: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -32 +; RV32IF-NEXT: .cfi_def_cfa_offset 32 +; RV32IF-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: mv a2, a1 +; RV32IF-NEXT: mv a1, a0 +; RV32IF-NEXT: addi a0, sp, 8 +; RV32IF-NEXT: call __fixdfti@plt +; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a2, 20(sp) +; RV32IF-NEXT: li a1, 1 +; RV32IF-NEXT: mv a4, a0 +; RV32IF-NEXT: bgez a2, .LBB47_5 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bgeu a0, a1, .LBB47_6 +; RV32IF-NEXT: .LBB47_2: # %entry +; RV32IF-NEXT: beqz a2, .LBB47_7 +; RV32IF-NEXT: .LBB47_3: # %entry +; RV32IF-NEXT: slti a1, a2, 0 +; RV32IF-NEXT: mv a3, a4 +; RV32IF-NEXT: beqz a1, .LBB47_8 +; RV32IF-NEXT: .LBB47_4: +; RV32IF-NEXT: lw a5, 8(sp) +; RV32IF-NEXT: j .LBB47_9 +; RV32IF-NEXT: .LBB47_5: # %entry +; RV32IF-NEXT: li a4, 1 +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bltu a0, a1, .LBB47_2 +; RV32IF-NEXT: .LBB47_6: # %entry +; RV32IF-NEXT: li a3, 1 +; RV32IF-NEXT: bnez a2, .LBB47_3 +; RV32IF-NEXT: .LBB47_7: +; RV32IF-NEXT: seqz a1, a0 +; RV32IF-NEXT: bnez a1, .LBB47_4 +; RV32IF-NEXT: .LBB47_8: # %entry +; RV32IF-NEXT: li a5, 0 +; RV32IF-NEXT: .LBB47_9: # %entry +; RV32IF-NEXT: xori a0, a0, 1 +; RV32IF-NEXT: or a0, a0, a2 +; RV32IF-NEXT: li a4, 0 +; RV32IF-NEXT: beqz a0, .LBB47_11 +; RV32IF-NEXT: # %bb.10: # %entry +; RV32IF-NEXT: mv a4, a5 +; RV32IF-NEXT: .LBB47_11: # %entry +; RV32IF-NEXT: bnez a1, .LBB47_13 +; RV32IF-NEXT: # %bb.12: # %entry +; RV32IF-NEXT: li a5, 0 +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: bnez a0, .LBB47_14 +; RV32IF-NEXT: j .LBB47_15 +; RV32IF-NEXT: .LBB47_13: +; RV32IF-NEXT: lw a5, 12(sp) +; RV32IF-NEXT: li a1, 0 +; RV32IF-NEXT: beqz a0, .LBB47_15 +; RV32IF-NEXT: .LBB47_14: # %entry +; RV32IF-NEXT: mv a1, a5 +; RV32IF-NEXT: .LBB47_15: # %entry +; RV32IF-NEXT: bgez a2, .LBB47_20 +; RV32IF-NEXT: # %bb.16: # %entry +; RV32IF-NEXT: mv a5, a4 +; RV32IF-NEXT: beqz a1, .LBB47_21 +; RV32IF-NEXT: .LBB47_17: # %entry +; RV32IF-NEXT: mv a0, a4 +; RV32IF-NEXT: bnez a1, .LBB47_22 +; RV32IF-NEXT: .LBB47_18: # %entry +; RV32IF-NEXT: beqz a2, .LBB47_23 +; RV32IF-NEXT: .LBB47_19: # %entry +; RV32IF-NEXT: sgtz a5, a2 +; RV32IF-NEXT: beqz a5, .LBB47_24 +; RV32IF-NEXT: j .LBB47_25 +; RV32IF-NEXT: .LBB47_20: # %entry +; RV32IF-NEXT: li a2, 0 +; RV32IF-NEXT: mv a5, a4 +; RV32IF-NEXT: bnez a1, .LBB47_17 +; RV32IF-NEXT: .LBB47_21: # %entry +; RV32IF-NEXT: li a5, 0 +; RV32IF-NEXT: mv a0, a4 +; RV32IF-NEXT: beqz a1, .LBB47_18 +; RV32IF-NEXT: .LBB47_22: # %entry +; RV32IF-NEXT: mv a0, a5 +; RV32IF-NEXT: bnez a2, .LBB47_19 +; RV32IF-NEXT: .LBB47_23: +; RV32IF-NEXT: snez a5, a3 +; RV32IF-NEXT: bnez a5, .LBB47_25 +; RV32IF-NEXT: .LBB47_24: # %entry +; RV32IF-NEXT: li a4, 0 +; RV32IF-NEXT: .LBB47_25: # %entry +; RV32IF-NEXT: or a2, a3, a2 +; RV32IF-NEXT: bnez a2, .LBB47_30 +; RV32IF-NEXT: # %bb.26: # %entry +; RV32IF-NEXT: mv a3, a1 +; RV32IF-NEXT: beqz a5, .LBB47_31 +; RV32IF-NEXT: .LBB47_27: # %entry +; RV32IF-NEXT: beqz a2, .LBB47_29 +; RV32IF-NEXT: .LBB47_28: # %entry +; RV32IF-NEXT: mv a1, a3 +; RV32IF-NEXT: .LBB47_29: # %entry +; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 32 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB47_30: # %entry +; RV32IF-NEXT: mv a0, a4 +; RV32IF-NEXT: mv a3, a1 +; RV32IF-NEXT: bnez a5, .LBB47_27 +; RV32IF-NEXT: .LBB47_31: # %entry +; RV32IF-NEXT: li a3, 0 +; RV32IF-NEXT: bnez a2, .LBB47_28 +; RV32IF-NEXT: j .LBB47_29 ; ; RV64-LABEL: ustest_f64i64_mm: ; RV64: # %bb.0: # %entry @@ -3473,6 +3682,116 @@ ; RV64-NEXT: li a1, 0 ; RV64-NEXT: bnez a3, .LBB47_4 ; RV64-NEXT: j .LBB47_5 +; +; RV32IFD-LABEL: ustest_f64i64_mm: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -32 +; RV32IFD-NEXT: .cfi_def_cfa_offset 32 +; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: .cfi_offset ra, -4 +; RV32IFD-NEXT: addi a0, sp, 8 +; RV32IFD-NEXT: call __fixdfti@plt +; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a2, 20(sp) +; RV32IFD-NEXT: li a1, 1 +; RV32IFD-NEXT: mv a4, a0 +; RV32IFD-NEXT: bgez a2, .LBB47_5 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: mv a3, a0 +; RV32IFD-NEXT: bgeu a0, a1, .LBB47_6 +; RV32IFD-NEXT: .LBB47_2: # %entry +; RV32IFD-NEXT: beqz a2, .LBB47_7 +; RV32IFD-NEXT: .LBB47_3: # %entry +; RV32IFD-NEXT: slti a1, a2, 0 +; RV32IFD-NEXT: mv a3, a4 +; RV32IFD-NEXT: beqz a1, .LBB47_8 +; RV32IFD-NEXT: .LBB47_4: +; RV32IFD-NEXT: lw a5, 8(sp) +; RV32IFD-NEXT: j .LBB47_9 +; RV32IFD-NEXT: .LBB47_5: # %entry +; RV32IFD-NEXT: li a4, 1 +; RV32IFD-NEXT: mv a3, a0 +; RV32IFD-NEXT: bltu a0, a1, .LBB47_2 +; RV32IFD-NEXT: .LBB47_6: # %entry +; RV32IFD-NEXT: li a3, 1 +; RV32IFD-NEXT: bnez a2, .LBB47_3 +; RV32IFD-NEXT: .LBB47_7: +; RV32IFD-NEXT: seqz a1, a0 +; RV32IFD-NEXT: bnez a1, .LBB47_4 +; RV32IFD-NEXT: .LBB47_8: # %entry +; RV32IFD-NEXT: li a5, 0 +; RV32IFD-NEXT: .LBB47_9: # %entry +; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: or a0, a0, a2 +; RV32IFD-NEXT: li a4, 0 +; RV32IFD-NEXT: beqz a0, .LBB47_11 +; RV32IFD-NEXT: # %bb.10: # %entry +; RV32IFD-NEXT: mv a4, a5 +; RV32IFD-NEXT: .LBB47_11: # %entry +; RV32IFD-NEXT: bnez a1, .LBB47_13 +; RV32IFD-NEXT: # %bb.12: # %entry +; RV32IFD-NEXT: li a5, 0 +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: bnez a0, .LBB47_14 +; RV32IFD-NEXT: j .LBB47_15 +; RV32IFD-NEXT: .LBB47_13: +; RV32IFD-NEXT: lw a5, 12(sp) +; RV32IFD-NEXT: li a1, 0 +; RV32IFD-NEXT: beqz a0, .LBB47_15 +; RV32IFD-NEXT: .LBB47_14: # %entry +; RV32IFD-NEXT: mv a1, a5 +; RV32IFD-NEXT: .LBB47_15: # %entry +; RV32IFD-NEXT: bgez a2, .LBB47_20 +; RV32IFD-NEXT: # %bb.16: # %entry +; RV32IFD-NEXT: mv a5, a4 +; RV32IFD-NEXT: beqz a1, .LBB47_21 +; RV32IFD-NEXT: .LBB47_17: # %entry +; RV32IFD-NEXT: mv a0, a4 +; RV32IFD-NEXT: bnez a1, .LBB47_22 +; RV32IFD-NEXT: .LBB47_18: # %entry +; RV32IFD-NEXT: beqz a2, .LBB47_23 +; RV32IFD-NEXT: .LBB47_19: # %entry +; RV32IFD-NEXT: sgtz a5, a2 +; RV32IFD-NEXT: beqz a5, .LBB47_24 +; RV32IFD-NEXT: j .LBB47_25 +; RV32IFD-NEXT: .LBB47_20: # %entry +; RV32IFD-NEXT: li a2, 0 +; RV32IFD-NEXT: mv a5, a4 +; RV32IFD-NEXT: bnez a1, .LBB47_17 +; RV32IFD-NEXT: .LBB47_21: # %entry +; RV32IFD-NEXT: li a5, 0 +; RV32IFD-NEXT: mv a0, a4 +; RV32IFD-NEXT: beqz a1, .LBB47_18 +; RV32IFD-NEXT: .LBB47_22: # %entry +; RV32IFD-NEXT: mv a0, a5 +; RV32IFD-NEXT: bnez a2, .LBB47_19 +; RV32IFD-NEXT: .LBB47_23: +; RV32IFD-NEXT: snez a5, a3 +; RV32IFD-NEXT: bnez a5, .LBB47_25 +; RV32IFD-NEXT: .LBB47_24: # %entry +; RV32IFD-NEXT: li a4, 0 +; RV32IFD-NEXT: .LBB47_25: # %entry +; RV32IFD-NEXT: or a2, a3, a2 +; RV32IFD-NEXT: bnez a2, .LBB47_30 +; RV32IFD-NEXT: # %bb.26: # %entry +; RV32IFD-NEXT: mv a3, a1 +; RV32IFD-NEXT: beqz a5, .LBB47_31 +; RV32IFD-NEXT: .LBB47_27: # %entry +; RV32IFD-NEXT: beqz a2, .LBB47_29 +; RV32IFD-NEXT: .LBB47_28: # %entry +; RV32IFD-NEXT: mv a1, a3 +; RV32IFD-NEXT: .LBB47_29: # %entry +; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB47_30: # %entry +; RV32IFD-NEXT: mv a0, a4 +; RV32IFD-NEXT: mv a3, a1 +; RV32IFD-NEXT: bnez a5, .LBB47_27 +; RV32IFD-NEXT: .LBB47_31: # %entry +; RV32IFD-NEXT: li a3, 0 +; RV32IFD-NEXT: bnez a2, .LBB47_28 +; RV32IFD-NEXT: j .LBB47_29 entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 18446744073709551616) @@ -3488,7 +3807,6 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a5, 8(sp) @@ -3601,11 +3919,10 @@ ; ; RV64-LABEL: stest_f32i64_mm: ; RV64: # %bb.0: # %entry -; RV64-NEXT: fmv.w.x ft0, a0 -; RV64-NEXT: feq.s a0, ft0, ft0 +; RV64-NEXT: feq.s a0, fa0, fa0 ; RV64-NEXT: beqz a0, .LBB48_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: fcvt.l.s a0, ft0, rtz +; RV64-NEXT: fcvt.l.s a0, fa0, rtz ; RV64-NEXT: .LBB48_2: # %entry ; RV64-NEXT: ret entry: @@ -3623,7 +3940,6 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti@plt ; RV32-NEXT: lw a0, 20(sp) @@ -3699,7 +4015,6 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a0, 16(sp) @@ -3857,8 +4172,8 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a5, 8(sp) @@ -3975,6 +4290,7 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixsfti@plt ; RV64-NEXT: li a2, -1 @@ -4035,8 +4351,8 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti@plt ; RV32-NEXT: lw a0, 20(sp) @@ -4083,6 +4399,7 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixunssfti@plt ; RV64-NEXT: mv a2, a0 @@ -4113,8 +4430,8 @@ ; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: fmv.x.w a0, fa0 ; RV32-NEXT: call __extendhfsf2@plt -; RV32-NEXT: mv a1, a0 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti@plt ; RV32-NEXT: lw a0, 16(sp) @@ -4225,6 +4542,7 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: fmv.x.w a0, fa0 ; RV64-NEXT: call __extendhfsf2@plt ; RV64-NEXT: call __fixsfti@plt ; RV64-NEXT: mv a2, a0 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -1,17 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+v -verify-machineinstrs < %s \ +; RUN: -target-abi=lp64d | FileCheck %s ; i32 saturate define <2 x i32> @stest_f64i32(<2 x double> %x) { ; CHECK-LABEL: stest_f64i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a1 -; CHECK-NEXT: fmv.d.x ft1, a0 -; CHECK-NEXT: fcvt.l.d a1, ft0, rtz +; CHECK-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NEXT: lui a2, 524288 ; CHECK-NEXT: addiw a3, a2, -1 -; CHECK-NEXT: fcvt.l.d a0, ft1, rtz +; CHECK-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NEXT: bge a1, a3, .LBB0_5 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bge a0, a3, .LBB0_6 @@ -46,12 +45,10 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) { ; CHECK-LABEL: utest_f64i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: fmv.d.x ft1, a1 -; CHECK-NEXT: fcvt.lu.d a0, ft0, rtz +; CHECK-NEXT: fcvt.lu.d a0, fa0, rtz ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: srli a2, a1, 32 -; CHECK-NEXT: fcvt.lu.d a1, ft1, rtz +; CHECK-NEXT: fcvt.lu.d a1, fa1, rtz ; CHECK-NEXT: bgeu a0, a2, .LBB1_3 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bgeu a1, a2, .LBB1_4 @@ -74,12 +71,10 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a1 -; CHECK-NEXT: fmv.d.x ft1, a0 -; CHECK-NEXT: fcvt.l.d a1, ft0, rtz +; CHECK-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: srli a2, a0, 32 -; CHECK-NEXT: fcvt.l.d a0, ft1, rtz +; CHECK-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NEXT: bge a1, a2, .LBB2_5 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bge a0, a2, .LBB2_6 @@ -114,61 +109,59 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fmv.w.x ft2, a3 -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: lui a4, 524288 -; CHECK-NEXT: addiw a6, a4, -1 -; CHECK-NEXT: fcvt.l.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a6, .LBB3_2 +; CHECK-NEXT: fcvt.l.s a1, fa3, rtz +; CHECK-NEXT: lui a3, 524288 +; CHECK-NEXT: addiw a6, a3, -1 +; CHECK-NEXT: fcvt.l.s a2, fa2, rtz +; CHECK-NEXT: bge a1, a6, .LBB3_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a6 +; CHECK-NEXT: fcvt.l.s a4, fa1, rtz +; CHECK-NEXT: bge a2, a6, .LBB3_11 ; CHECK-NEXT: .LBB3_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a6, .LBB3_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a5, ft1, rtz -; CHECK-NEXT: bge a1, a6, .LBB3_12 -; CHECK-NEXT: .LBB3_4: # %entry +; CHECK-NEXT: fcvt.l.s a5, fa0, rtz +; CHECK-NEXT: bge a4, a6, .LBB3_12 +; CHECK-NEXT: .LBB3_3: # %entry ; CHECK-NEXT: bge a5, a6, .LBB3_13 +; CHECK-NEXT: .LBB3_4: # %entry +; CHECK-NEXT: bge a3, a5, .LBB3_14 ; CHECK-NEXT: .LBB3_5: # %entry -; CHECK-NEXT: bge a4, a5, .LBB3_14 +; CHECK-NEXT: bge a3, a4, .LBB3_15 ; CHECK-NEXT: .LBB3_6: # %entry -; CHECK-NEXT: bge a4, a1, .LBB3_15 +; CHECK-NEXT: bge a3, a2, .LBB3_16 ; CHECK-NEXT: .LBB3_7: # %entry -; CHECK-NEXT: bge a4, a3, .LBB3_16 +; CHECK-NEXT: blt a3, a1, .LBB3_9 ; CHECK-NEXT: .LBB3_8: # %entry -; CHECK-NEXT: blt a4, a2, .LBB3_10 +; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: .LBB3_9: # %entry -; CHECK-NEXT: lui a2, 524288 -; CHECK-NEXT: .LBB3_10: # %entry -; CHECK-NEXT: sw a2, 12(a0) -; CHECK-NEXT: sw a3, 8(a0) -; CHECK-NEXT: sw a1, 4(a0) +; CHECK-NEXT: sw a1, 12(a0) +; CHECK-NEXT: sw a2, 8(a0) +; CHECK-NEXT: sw a4, 4(a0) ; CHECK-NEXT: sw a5, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_10: # %entry +; CHECK-NEXT: mv a1, a6 +; CHECK-NEXT: fcvt.l.s a4, fa1, rtz +; CHECK-NEXT: blt a2, a6, .LBB3_2 ; CHECK-NEXT: .LBB3_11: # %entry -; CHECK-NEXT: mv a3, a6 -; CHECK-NEXT: fcvt.l.s a5, ft1, rtz -; CHECK-NEXT: blt a1, a6, .LBB3_4 +; CHECK-NEXT: mv a2, a6 +; CHECK-NEXT: fcvt.l.s a5, fa0, rtz +; CHECK-NEXT: blt a4, a6, .LBB3_3 ; CHECK-NEXT: .LBB3_12: # %entry -; CHECK-NEXT: mv a1, a6 -; CHECK-NEXT: blt a5, a6, .LBB3_5 +; CHECK-NEXT: mv a4, a6 +; CHECK-NEXT: blt a5, a6, .LBB3_4 ; CHECK-NEXT: .LBB3_13: # %entry ; CHECK-NEXT: mv a5, a6 -; CHECK-NEXT: blt a4, a5, .LBB3_6 +; CHECK-NEXT: blt a3, a5, .LBB3_5 ; CHECK-NEXT: .LBB3_14: # %entry ; CHECK-NEXT: lui a5, 524288 -; CHECK-NEXT: blt a4, a1, .LBB3_7 +; CHECK-NEXT: blt a3, a4, .LBB3_6 ; CHECK-NEXT: .LBB3_15: # %entry -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: blt a4, a3, .LBB3_8 +; CHECK-NEXT: lui a4, 524288 +; CHECK-NEXT: blt a3, a2, .LBB3_7 ; CHECK-NEXT: .LBB3_16: # %entry -; CHECK-NEXT: lui a3, 524288 -; CHECK-NEXT: bge a4, a2, .LBB3_9 -; CHECK-NEXT: j .LBB3_10 +; CHECK-NEXT: lui a2, 524288 +; CHECK-NEXT: bge a3, a1, .LBB3_8 +; CHECK-NEXT: j .LBB3_9 entry: %conv = fptosi <4 x float> %x to <4 x i64> %0 = icmp slt <4 x i64> %conv, @@ -182,41 +175,39 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-LABEL: utest_f32i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fmv.w.x ft2, a2 -; CHECK-NEXT: fmv.w.x ft0, a3 -; CHECK-NEXT: fcvt.lu.s a1, ft1, rtz +; CHECK-NEXT: fcvt.lu.s a1, fa0, rtz ; CHECK-NEXT: li a2, -1 ; CHECK-NEXT: srli a3, a2, 32 -; CHECK-NEXT: fcvt.lu.s a2, ft2, rtz -; CHECK-NEXT: bltu a1, a3, .LBB4_2 +; CHECK-NEXT: fcvt.lu.s a2, fa1, rtz +; CHECK-NEXT: bgeu a1, a3, .LBB4_6 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB4_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz +; CHECK-NEXT: fcvt.lu.s a4, fa2, rtz ; CHECK-NEXT: bgeu a2, a3, .LBB4_7 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz +; CHECK-NEXT: .LBB4_2: # %entry +; CHECK-NEXT: fcvt.lu.s a5, fa3, rtz ; CHECK-NEXT: bgeu a4, a3, .LBB4_8 +; CHECK-NEXT: .LBB4_3: # %entry +; CHECK-NEXT: bltu a5, a3, .LBB4_5 ; CHECK-NEXT: .LBB4_4: # %entry -; CHECK-NEXT: bltu a5, a3, .LBB4_6 -; CHECK-NEXT: .LBB4_5: # %entry ; CHECK-NEXT: mv a5, a3 -; CHECK-NEXT: .LBB4_6: # %entry +; CHECK-NEXT: .LBB4_5: # %entry ; CHECK-NEXT: sw a5, 12(a0) ; CHECK-NEXT: sw a4, 8(a0) ; CHECK-NEXT: sw a2, 4(a0) ; CHECK-NEXT: sw a1, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_6: # %entry +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: fcvt.lu.s a4, fa2, rtz +; CHECK-NEXT: bltu a2, a3, .LBB4_2 ; CHECK-NEXT: .LBB4_7: # %entry ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz -; CHECK-NEXT: bltu a4, a3, .LBB4_4 +; CHECK-NEXT: fcvt.lu.s a5, fa3, rtz +; CHECK-NEXT: bltu a4, a3, .LBB4_3 ; CHECK-NEXT: .LBB4_8: # %entry ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: bgeu a5, a3, .LBB4_5 -; CHECK-NEXT: j .LBB4_6 +; CHECK-NEXT: bgeu a5, a3, .LBB4_4 +; CHECK-NEXT: j .LBB4_5 entry: %conv = fptoui <4 x float> %x to <4 x i64> %0 = icmp ult <4 x i64> %conv, @@ -228,61 +219,59 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fmv.w.x ft2, a3 -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: li a3, -1 -; CHECK-NEXT: srli a5, a3, 32 -; CHECK-NEXT: fcvt.l.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a5, .LBB5_2 +; CHECK-NEXT: fcvt.l.s a1, fa3, rtz +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: srli a5, a2, 32 +; CHECK-NEXT: fcvt.l.s a2, fa2, rtz +; CHECK-NEXT: bge a1, a5, .LBB5_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.l.s a3, fa1, rtz +; CHECK-NEXT: bge a2, a5, .LBB5_11 ; CHECK-NEXT: .LBB5_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a5, .LBB5_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: bge a1, a5, .LBB5_12 -; CHECK-NEXT: .LBB5_4: # %entry +; CHECK-NEXT: fcvt.l.s a4, fa0, rtz +; CHECK-NEXT: bge a3, a5, .LBB5_12 +; CHECK-NEXT: .LBB5_3: # %entry ; CHECK-NEXT: bge a4, a5, .LBB5_13 -; CHECK-NEXT: .LBB5_5: # %entry +; CHECK-NEXT: .LBB5_4: # %entry ; CHECK-NEXT: blez a4, .LBB5_14 +; CHECK-NEXT: .LBB5_5: # %entry +; CHECK-NEXT: blez a3, .LBB5_15 ; CHECK-NEXT: .LBB5_6: # %entry -; CHECK-NEXT: blez a1, .LBB5_15 +; CHECK-NEXT: blez a2, .LBB5_16 ; CHECK-NEXT: .LBB5_7: # %entry -; CHECK-NEXT: blez a3, .LBB5_16 +; CHECK-NEXT: bgtz a1, .LBB5_9 ; CHECK-NEXT: .LBB5_8: # %entry -; CHECK-NEXT: bgtz a2, .LBB5_10 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: .LBB5_9: # %entry -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: .LBB5_10: # %entry -; CHECK-NEXT: sw a2, 12(a0) -; CHECK-NEXT: sw a3, 8(a0) -; CHECK-NEXT: sw a1, 4(a0) +; CHECK-NEXT: sw a1, 12(a0) +; CHECK-NEXT: sw a2, 8(a0) +; CHECK-NEXT: sw a3, 4(a0) ; CHECK-NEXT: sw a4, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_10: # %entry +; CHECK-NEXT: mv a1, a5 +; CHECK-NEXT: fcvt.l.s a3, fa1, rtz +; CHECK-NEXT: blt a2, a5, .LBB5_2 ; CHECK-NEXT: .LBB5_11: # %entry -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a1, a5, .LBB5_4 +; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.l.s a4, fa0, rtz +; CHECK-NEXT: blt a3, a5, .LBB5_3 ; CHECK-NEXT: .LBB5_12: # %entry -; CHECK-NEXT: mv a1, a5 -; CHECK-NEXT: blt a4, a5, .LBB5_5 +; CHECK-NEXT: mv a3, a5 +; CHECK-NEXT: blt a4, a5, .LBB5_4 ; CHECK-NEXT: .LBB5_13: # %entry ; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: bgtz a4, .LBB5_6 +; CHECK-NEXT: bgtz a4, .LBB5_5 ; CHECK-NEXT: .LBB5_14: # %entry ; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: bgtz a1, .LBB5_7 +; CHECK-NEXT: bgtz a3, .LBB5_6 ; CHECK-NEXT: .LBB5_15: # %entry -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz a3, .LBB5_8 -; CHECK-NEXT: .LBB5_16: # %entry ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: blez a2, .LBB5_9 -; CHECK-NEXT: j .LBB5_10 +; CHECK-NEXT: bgtz a2, .LBB5_7 +; CHECK-NEXT: .LBB5_16: # %entry +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: blez a1, .LBB5_8 +; CHECK-NEXT: j .LBB5_9 entry: %conv = fptosi <4 x float> %x to <4 x i64> %0 = icmp slt <4 x i64> %conv, @@ -303,63 +292,59 @@ ; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 ; CHECK-NEXT: .cfi_offset s3, -40 -; CHECK-NEXT: .cfi_offset s4, -48 -; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: .cfi_offset fs0, -48 +; CHECK-NEXT: .cfi_offset fs1, -56 +; CHECK-NEXT: .cfi_offset fs2, -64 +; CHECK-NEXT: lhu s1, 24(a1) +; CHECK-NEXT: lhu s2, 0(a1) ; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s3 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.l.s s3, ft0, rtz +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.l.s s2, fs2, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: addiw a4, a1, -1 -; CHECK-NEXT: blt a0, a4, .LBB6_2 +; CHECK-NEXT: bge a0, a4, .LBB6_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: fcvt.l.s a2, fs1, rtz +; CHECK-NEXT: bge s2, a4, .LBB6_11 ; CHECK-NEXT: .LBB6_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: bge s3, a4, .LBB6_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a3, ft0, rtz +; CHECK-NEXT: fcvt.l.s a3, fs0, rtz ; CHECK-NEXT: bge a2, a4, .LBB6_12 -; CHECK-NEXT: .LBB6_4: # %entry +; CHECK-NEXT: .LBB6_3: # %entry ; CHECK-NEXT: bge a3, a4, .LBB6_13 -; CHECK-NEXT: .LBB6_5: # %entry +; CHECK-NEXT: .LBB6_4: # %entry ; CHECK-NEXT: bge a1, a3, .LBB6_14 -; CHECK-NEXT: .LBB6_6: # %entry +; CHECK-NEXT: .LBB6_5: # %entry ; CHECK-NEXT: bge a1, a2, .LBB6_15 +; CHECK-NEXT: .LBB6_6: # %entry +; CHECK-NEXT: bge a1, s2, .LBB6_16 ; CHECK-NEXT: .LBB6_7: # %entry -; CHECK-NEXT: bge a1, s3, .LBB6_16 +; CHECK-NEXT: blt a1, a0, .LBB6_9 ; CHECK-NEXT: .LBB6_8: # %entry -; CHECK-NEXT: blt a1, a0, .LBB6_10 -; CHECK-NEXT: .LBB6_9: # %entry ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: .LBB6_10: # %entry +; CHECK-NEXT: .LBB6_9: # %entry ; CHECK-NEXT: sw a0, 12(s0) -; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw s2, 8(s0) ; CHECK-NEXT: sw a2, 4(s0) ; CHECK-NEXT: sw a3, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -367,29 +352,35 @@ ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_10: # %entry +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: fcvt.l.s a2, fs1, rtz +; CHECK-NEXT: blt s2, a4, .LBB6_2 ; CHECK-NEXT: .LBB6_11: # %entry -; CHECK-NEXT: mv s3, a4 -; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, a4, .LBB6_4 +; CHECK-NEXT: mv s2, a4 +; CHECK-NEXT: fcvt.l.s a3, fs0, rtz +; CHECK-NEXT: blt a2, a4, .LBB6_3 ; CHECK-NEXT: .LBB6_12: # %entry ; CHECK-NEXT: mv a2, a4 -; CHECK-NEXT: blt a3, a4, .LBB6_5 +; CHECK-NEXT: blt a3, a4, .LBB6_4 ; CHECK-NEXT: .LBB6_13: # %entry ; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: blt a1, a3, .LBB6_6 +; CHECK-NEXT: blt a1, a3, .LBB6_5 ; CHECK-NEXT: .LBB6_14: # %entry ; CHECK-NEXT: lui a3, 524288 -; CHECK-NEXT: blt a1, a2, .LBB6_7 +; CHECK-NEXT: blt a1, a2, .LBB6_6 ; CHECK-NEXT: .LBB6_15: # %entry ; CHECK-NEXT: lui a2, 524288 -; CHECK-NEXT: blt a1, s3, .LBB6_8 +; CHECK-NEXT: blt a1, s2, .LBB6_7 ; CHECK-NEXT: .LBB6_16: # %entry -; CHECK-NEXT: lui s3, 524288 -; CHECK-NEXT: bge a1, a0, .LBB6_9 -; CHECK-NEXT: j .LBB6_10 +; CHECK-NEXT: lui s2, 524288 +; CHECK-NEXT: bge a1, a0, .LBB6_8 +; CHECK-NEXT: j .LBB6_9 entry: %conv = fptosi <4 x half> %x to <4 x i64> %0 = icmp slt <4 x i64> %conv, @@ -410,73 +401,75 @@ ; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 ; CHECK-NEXT: .cfi_offset s3, -40 -; CHECK-NEXT: .cfi_offset s4, -48 -; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s1, 24(a1) +; CHECK-NEXT: .cfi_offset fs0, -48 +; CHECK-NEXT: .cfi_offset fs1, -56 +; CHECK-NEXT: .cfi_offset fs2, -64 +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 24(a1) ; CHECK-NEXT: lhu s3, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s3 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.lu.s s3, ft0, rtz +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.lu.s s2, fs2, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: srli a1, a1, 32 -; CHECK-NEXT: bltu a0, a1, .LBB7_2 +; CHECK-NEXT: bgeu a0, a1, .LBB7_6 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: fcvt.lu.s a2, fs1, rtz +; CHECK-NEXT: bgeu s2, a1, .LBB7_7 ; CHECK-NEXT: .LBB7_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.lu.s a2, ft1, rtz -; CHECK-NEXT: bgeu s3, a1, .LBB7_7 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz +; CHECK-NEXT: fcvt.lu.s a3, fs0, rtz ; CHECK-NEXT: bgeu a2, a1, .LBB7_8 +; CHECK-NEXT: .LBB7_3: # %entry +; CHECK-NEXT: bltu a3, a1, .LBB7_5 ; CHECK-NEXT: .LBB7_4: # %entry -; CHECK-NEXT: bltu a3, a1, .LBB7_6 -; CHECK-NEXT: .LBB7_5: # %entry ; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB7_6: # %entry +; CHECK-NEXT: .LBB7_5: # %entry ; CHECK-NEXT: sw a3, 12(s0) ; CHECK-NEXT: sw a2, 8(s0) -; CHECK-NEXT: sw s3, 4(s0) +; CHECK-NEXT: sw s2, 4(s0) ; CHECK-NEXT: sw a0, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_6: # %entry +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: fcvt.lu.s a2, fs1, rtz +; CHECK-NEXT: bltu s2, a1, .LBB7_2 ; CHECK-NEXT: .LBB7_7: # %entry -; CHECK-NEXT: mv s3, a1 -; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz -; CHECK-NEXT: bltu a2, a1, .LBB7_4 +; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: fcvt.lu.s a3, fs0, rtz +; CHECK-NEXT: bltu a2, a1, .LBB7_3 ; CHECK-NEXT: .LBB7_8: # %entry ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: bgeu a3, a1, .LBB7_5 -; CHECK-NEXT: j .LBB7_6 +; CHECK-NEXT: bgeu a3, a1, .LBB7_4 +; CHECK-NEXT: j .LBB7_5 entry: %conv = fptoui <4 x half> %x to <4 x i64> %0 = icmp ult <4 x i64> %conv, @@ -495,63 +488,59 @@ ; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 ; CHECK-NEXT: .cfi_offset s3, -40 -; CHECK-NEXT: .cfi_offset s4, -48 -; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: .cfi_offset fs0, -48 +; CHECK-NEXT: .cfi_offset fs1, -56 +; CHECK-NEXT: .cfi_offset fs2, -64 +; CHECK-NEXT: lhu s1, 24(a1) +; CHECK-NEXT: lhu s2, 0(a1) ; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s3 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.l.s s3, ft0, rtz +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.l.s s2, fs2, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: srli a3, a1, 32 -; CHECK-NEXT: blt a0, a3, .LBB8_2 +; CHECK-NEXT: bge a0, a3, .LBB8_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a3 +; CHECK-NEXT: fcvt.l.s a1, fs1, rtz +; CHECK-NEXT: bge s2, a3, .LBB8_11 ; CHECK-NEXT: .LBB8_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.l.s a1, ft1, rtz -; CHECK-NEXT: bge s3, a3, .LBB8_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a2, ft0, rtz +; CHECK-NEXT: fcvt.l.s a2, fs0, rtz ; CHECK-NEXT: bge a1, a3, .LBB8_12 -; CHECK-NEXT: .LBB8_4: # %entry +; CHECK-NEXT: .LBB8_3: # %entry ; CHECK-NEXT: bge a2, a3, .LBB8_13 -; CHECK-NEXT: .LBB8_5: # %entry +; CHECK-NEXT: .LBB8_4: # %entry ; CHECK-NEXT: blez a2, .LBB8_14 -; CHECK-NEXT: .LBB8_6: # %entry +; CHECK-NEXT: .LBB8_5: # %entry ; CHECK-NEXT: blez a1, .LBB8_15 +; CHECK-NEXT: .LBB8_6: # %entry +; CHECK-NEXT: blez s2, .LBB8_16 ; CHECK-NEXT: .LBB8_7: # %entry -; CHECK-NEXT: blez s3, .LBB8_16 +; CHECK-NEXT: bgtz a0, .LBB8_9 ; CHECK-NEXT: .LBB8_8: # %entry -; CHECK-NEXT: bgtz a0, .LBB8_10 -; CHECK-NEXT: .LBB8_9: # %entry ; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: .LBB8_10: # %entry +; CHECK-NEXT: .LBB8_9: # %entry ; CHECK-NEXT: sw a0, 12(s0) -; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw s2, 8(s0) ; CHECK-NEXT: sw a1, 4(s0) ; CHECK-NEXT: sw a2, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -559,29 +548,35 @@ ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB8_10: # %entry +; CHECK-NEXT: mv a0, a3 +; CHECK-NEXT: fcvt.l.s a1, fs1, rtz +; CHECK-NEXT: blt s2, a3, .LBB8_2 ; CHECK-NEXT: .LBB8_11: # %entry -; CHECK-NEXT: mv s3, a3 -; CHECK-NEXT: fcvt.l.s a2, ft0, rtz -; CHECK-NEXT: blt a1, a3, .LBB8_4 +; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: fcvt.l.s a2, fs0, rtz +; CHECK-NEXT: blt a1, a3, .LBB8_3 ; CHECK-NEXT: .LBB8_12: # %entry ; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: blt a2, a3, .LBB8_5 +; CHECK-NEXT: blt a2, a3, .LBB8_4 ; CHECK-NEXT: .LBB8_13: # %entry ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: bgtz a2, .LBB8_6 +; CHECK-NEXT: bgtz a2, .LBB8_5 ; CHECK-NEXT: .LBB8_14: # %entry ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bgtz a1, .LBB8_7 +; CHECK-NEXT: bgtz a1, .LBB8_6 ; CHECK-NEXT: .LBB8_15: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s3, .LBB8_8 +; CHECK-NEXT: bgtz s2, .LBB8_7 ; CHECK-NEXT: .LBB8_16: # %entry -; CHECK-NEXT: li s3, 0 -; CHECK-NEXT: blez a0, .LBB8_9 -; CHECK-NEXT: j .LBB8_10 +; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: blez a0, .LBB8_8 +; CHECK-NEXT: j .LBB8_9 entry: %conv = fptosi <4 x half> %x to <4 x i64> %0 = icmp slt <4 x i64> %conv, @@ -597,12 +592,10 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) { ; CHECK-LABEL: stest_f64i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a1 -; CHECK-NEXT: fmv.d.x ft1, a0 -; CHECK-NEXT: fcvt.w.d a1, ft0, rtz +; CHECK-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addiw a2, a0, -1 -; CHECK-NEXT: fcvt.w.d a0, ft1, rtz +; CHECK-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NEXT: bge a1, a2, .LBB9_5 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bge a0, a2, .LBB9_6 @@ -639,12 +632,10 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) { ; CHECK-LABEL: utest_f64i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: fmv.d.x ft1, a1 -; CHECK-NEXT: fcvt.wu.d a0, ft0, rtz +; CHECK-NEXT: fcvt.wu.d a0, fa0, rtz ; CHECK-NEXT: lui a1, 16 ; CHECK-NEXT: addiw a2, a1, -1 -; CHECK-NEXT: fcvt.wu.d a1, ft1, rtz +; CHECK-NEXT: fcvt.wu.d a1, fa1, rtz ; CHECK-NEXT: bgeu a0, a2, .LBB10_3 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bgeu a1, a2, .LBB10_4 @@ -667,12 +658,10 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a1 -; CHECK-NEXT: fmv.d.x ft1, a0 -; CHECK-NEXT: fcvt.w.d a1, ft0, rtz +; CHECK-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NEXT: lui a0, 16 ; CHECK-NEXT: addiw a2, a0, -1 -; CHECK-NEXT: fcvt.w.d a0, ft1, rtz +; CHECK-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NEXT: bge a1, a2, .LBB11_5 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bge a0, a2, .LBB11_6 @@ -707,63 +696,61 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) { ; CHECK-LABEL: stest_f32i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fmv.w.x ft2, a3 -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: fcvt.w.s a2, ft1, rtz -; CHECK-NEXT: lui a3, 8 -; CHECK-NEXT: addiw a5, a3, -1 -; CHECK-NEXT: fcvt.w.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a5, .LBB12_2 +; CHECK-NEXT: fcvt.w.s a1, fa3, rtz +; CHECK-NEXT: lui a2, 8 +; CHECK-NEXT: addiw a5, a2, -1 +; CHECK-NEXT: fcvt.w.s a2, fa2, rtz +; CHECK-NEXT: bge a1, a5, .LBB12_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NEXT: bge a2, a5, .LBB12_11 ; CHECK-NEXT: .LBB12_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fcvt.w.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a5, .LBB12_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.w.s a4, ft1, rtz -; CHECK-NEXT: bge a1, a5, .LBB12_12 -; CHECK-NEXT: .LBB12_4: # %entry +; CHECK-NEXT: fcvt.w.s a4, fa0, rtz +; CHECK-NEXT: bge a3, a5, .LBB12_12 +; CHECK-NEXT: .LBB12_3: # %entry ; CHECK-NEXT: bge a4, a5, .LBB12_13 -; CHECK-NEXT: .LBB12_5: # %entry +; CHECK-NEXT: .LBB12_4: # %entry ; CHECK-NEXT: lui a5, 1048568 ; CHECK-NEXT: bge a5, a4, .LBB12_14 +; CHECK-NEXT: .LBB12_5: # %entry +; CHECK-NEXT: bge a5, a3, .LBB12_15 ; CHECK-NEXT: .LBB12_6: # %entry -; CHECK-NEXT: bge a5, a1, .LBB12_15 +; CHECK-NEXT: bge a5, a2, .LBB12_16 ; CHECK-NEXT: .LBB12_7: # %entry -; CHECK-NEXT: bge a5, a3, .LBB12_16 +; CHECK-NEXT: blt a5, a1, .LBB12_9 ; CHECK-NEXT: .LBB12_8: # %entry -; CHECK-NEXT: blt a5, a2, .LBB12_10 +; CHECK-NEXT: lui a1, 1048568 ; CHECK-NEXT: .LBB12_9: # %entry -; CHECK-NEXT: lui a2, 1048568 -; CHECK-NEXT: .LBB12_10: # %entry -; CHECK-NEXT: sh a2, 6(a0) -; CHECK-NEXT: sh a3, 4(a0) -; CHECK-NEXT: sh a1, 2(a0) +; CHECK-NEXT: sh a1, 6(a0) +; CHECK-NEXT: sh a2, 4(a0) +; CHECK-NEXT: sh a3, 2(a0) ; CHECK-NEXT: sh a4, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB12_10: # %entry +; CHECK-NEXT: mv a1, a5 +; CHECK-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NEXT: blt a2, a5, .LBB12_2 ; CHECK-NEXT: .LBB12_11: # %entry -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: fcvt.w.s a4, ft1, rtz -; CHECK-NEXT: blt a1, a5, .LBB12_4 +; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.w.s a4, fa0, rtz +; CHECK-NEXT: blt a3, a5, .LBB12_3 ; CHECK-NEXT: .LBB12_12: # %entry -; CHECK-NEXT: mv a1, a5 -; CHECK-NEXT: blt a4, a5, .LBB12_5 +; CHECK-NEXT: mv a3, a5 +; CHECK-NEXT: blt a4, a5, .LBB12_4 ; CHECK-NEXT: .LBB12_13: # %entry ; CHECK-NEXT: mv a4, a5 ; CHECK-NEXT: lui a5, 1048568 -; CHECK-NEXT: blt a5, a4, .LBB12_6 +; CHECK-NEXT: blt a5, a4, .LBB12_5 ; CHECK-NEXT: .LBB12_14: # %entry ; CHECK-NEXT: lui a4, 1048568 -; CHECK-NEXT: blt a5, a1, .LBB12_7 +; CHECK-NEXT: blt a5, a3, .LBB12_6 ; CHECK-NEXT: .LBB12_15: # %entry -; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: blt a5, a3, .LBB12_8 -; CHECK-NEXT: .LBB12_16: # %entry ; CHECK-NEXT: lui a3, 1048568 -; CHECK-NEXT: bge a5, a2, .LBB12_9 -; CHECK-NEXT: j .LBB12_10 +; CHECK-NEXT: blt a5, a2, .LBB12_7 +; CHECK-NEXT: .LBB12_16: # %entry +; CHECK-NEXT: lui a2, 1048568 +; CHECK-NEXT: bge a5, a1, .LBB12_8 +; CHECK-NEXT: j .LBB12_9 entry: %conv = fptosi <4 x float> %x to <4 x i32> %0 = icmp slt <4 x i32> %conv, @@ -777,41 +764,39 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) { ; CHECK-LABEL: utest_f32i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fmv.w.x ft2, a2 -; CHECK-NEXT: fmv.w.x ft0, a3 -; CHECK-NEXT: fcvt.wu.s a1, ft1, rtz +; CHECK-NEXT: fcvt.wu.s a1, fa0, rtz ; CHECK-NEXT: lui a2, 16 ; CHECK-NEXT: addiw a3, a2, -1 -; CHECK-NEXT: fcvt.wu.s a2, ft2, rtz -; CHECK-NEXT: bltu a1, a3, .LBB13_2 +; CHECK-NEXT: fcvt.wu.s a2, fa1, rtz +; CHECK-NEXT: bgeu a1, a3, .LBB13_6 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB13_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fcvt.wu.s a4, ft0, rtz +; CHECK-NEXT: fcvt.wu.s a4, fa2, rtz ; CHECK-NEXT: bgeu a2, a3, .LBB13_7 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.wu.s a5, ft1, rtz +; CHECK-NEXT: .LBB13_2: # %entry +; CHECK-NEXT: fcvt.wu.s a5, fa3, rtz ; CHECK-NEXT: bgeu a4, a3, .LBB13_8 +; CHECK-NEXT: .LBB13_3: # %entry +; CHECK-NEXT: bltu a5, a3, .LBB13_5 ; CHECK-NEXT: .LBB13_4: # %entry -; CHECK-NEXT: bltu a5, a3, .LBB13_6 -; CHECK-NEXT: .LBB13_5: # %entry ; CHECK-NEXT: mv a5, a3 -; CHECK-NEXT: .LBB13_6: # %entry +; CHECK-NEXT: .LBB13_5: # %entry ; CHECK-NEXT: sh a5, 6(a0) ; CHECK-NEXT: sh a4, 4(a0) ; CHECK-NEXT: sh a2, 2(a0) ; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB13_6: # %entry +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: fcvt.wu.s a4, fa2, rtz +; CHECK-NEXT: bltu a2, a3, .LBB13_2 ; CHECK-NEXT: .LBB13_7: # %entry ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: fcvt.wu.s a5, ft1, rtz -; CHECK-NEXT: bltu a4, a3, .LBB13_4 +; CHECK-NEXT: fcvt.wu.s a5, fa3, rtz +; CHECK-NEXT: bltu a4, a3, .LBB13_3 ; CHECK-NEXT: .LBB13_8: # %entry ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: bgeu a5, a3, .LBB13_5 -; CHECK-NEXT: j .LBB13_6 +; CHECK-NEXT: bgeu a5, a3, .LBB13_4 +; CHECK-NEXT: j .LBB13_5 entry: %conv = fptoui <4 x float> %x to <4 x i32> %0 = icmp ult <4 x i32> %conv, @@ -823,61 +808,59 @@ define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fmv.w.x ft2, a3 -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: fcvt.w.s a2, ft1, rtz -; CHECK-NEXT: lui a3, 16 -; CHECK-NEXT: addiw a5, a3, -1 -; CHECK-NEXT: fcvt.w.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a5, .LBB14_2 +; CHECK-NEXT: fcvt.w.s a1, fa3, rtz +; CHECK-NEXT: lui a2, 16 +; CHECK-NEXT: addiw a5, a2, -1 +; CHECK-NEXT: fcvt.w.s a2, fa2, rtz +; CHECK-NEXT: bge a1, a5, .LBB14_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NEXT: bge a2, a5, .LBB14_11 ; CHECK-NEXT: .LBB14_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fcvt.w.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a5, .LBB14_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.w.s a4, ft1, rtz -; CHECK-NEXT: bge a1, a5, .LBB14_12 -; CHECK-NEXT: .LBB14_4: # %entry +; CHECK-NEXT: fcvt.w.s a4, fa0, rtz +; CHECK-NEXT: bge a3, a5, .LBB14_12 +; CHECK-NEXT: .LBB14_3: # %entry ; CHECK-NEXT: bge a4, a5, .LBB14_13 -; CHECK-NEXT: .LBB14_5: # %entry +; CHECK-NEXT: .LBB14_4: # %entry ; CHECK-NEXT: blez a4, .LBB14_14 +; CHECK-NEXT: .LBB14_5: # %entry +; CHECK-NEXT: blez a3, .LBB14_15 ; CHECK-NEXT: .LBB14_6: # %entry -; CHECK-NEXT: blez a1, .LBB14_15 +; CHECK-NEXT: blez a2, .LBB14_16 ; CHECK-NEXT: .LBB14_7: # %entry -; CHECK-NEXT: blez a3, .LBB14_16 +; CHECK-NEXT: bgtz a1, .LBB14_9 ; CHECK-NEXT: .LBB14_8: # %entry -; CHECK-NEXT: bgtz a2, .LBB14_10 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: .LBB14_9: # %entry -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: .LBB14_10: # %entry -; CHECK-NEXT: sh a2, 6(a0) -; CHECK-NEXT: sh a3, 4(a0) -; CHECK-NEXT: sh a1, 2(a0) +; CHECK-NEXT: sh a1, 6(a0) +; CHECK-NEXT: sh a2, 4(a0) +; CHECK-NEXT: sh a3, 2(a0) ; CHECK-NEXT: sh a4, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB14_10: # %entry +; CHECK-NEXT: mv a1, a5 +; CHECK-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NEXT: blt a2, a5, .LBB14_2 ; CHECK-NEXT: .LBB14_11: # %entry -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: fcvt.w.s a4, ft1, rtz -; CHECK-NEXT: blt a1, a5, .LBB14_4 +; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.w.s a4, fa0, rtz +; CHECK-NEXT: blt a3, a5, .LBB14_3 ; CHECK-NEXT: .LBB14_12: # %entry -; CHECK-NEXT: mv a1, a5 -; CHECK-NEXT: blt a4, a5, .LBB14_5 +; CHECK-NEXT: mv a3, a5 +; CHECK-NEXT: blt a4, a5, .LBB14_4 ; CHECK-NEXT: .LBB14_13: # %entry ; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: bgtz a4, .LBB14_6 +; CHECK-NEXT: bgtz a4, .LBB14_5 ; CHECK-NEXT: .LBB14_14: # %entry ; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: bgtz a1, .LBB14_7 +; CHECK-NEXT: bgtz a3, .LBB14_6 ; CHECK-NEXT: .LBB14_15: # %entry -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz a3, .LBB14_8 -; CHECK-NEXT: .LBB14_16: # %entry ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: blez a2, .LBB14_9 -; CHECK-NEXT: j .LBB14_10 +; CHECK-NEXT: bgtz a2, .LBB14_7 +; CHECK-NEXT: .LBB14_16: # %entry +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: blez a1, .LBB14_8 +; CHECK-NEXT: j .LBB14_9 entry: %conv = fptosi <4 x float> %x to <4 x i32> %0 = icmp slt <4 x i32> %conv, @@ -891,18 +874,24 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-LABEL: stest_f16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -96 -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -128 +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs3, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs5, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs6, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -912,157 +901,169 @@ ; CHECK-NEXT: .cfi_offset s5, -56 ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 -; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s1, 0(a1) -; CHECK-NEXT: lhu s2, 8(a1) -; CHECK-NEXT: lhu s3, 16(a1) -; CHECK-NEXT: lhu s4, 24(a1) -; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: .cfi_offset fs0, -80 +; CHECK-NEXT: .cfi_offset fs1, -88 +; CHECK-NEXT: .cfi_offset fs2, -96 +; CHECK-NEXT: .cfi_offset fs3, -104 +; CHECK-NEXT: .cfi_offset fs4, -112 +; CHECK-NEXT: .cfi_offset fs5, -120 +; CHECK-NEXT: .cfi_offset fs6, -128 +; CHECK-NEXT: lhu s1, 56(a1) +; CHECK-NEXT: lhu s2, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) +; CHECK-NEXT: lhu s4, 16(a1) +; CHECK-NEXT: lhu s5, 24(a1) +; CHECK-NEXT: lhu s6, 32(a1) ; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: fmv.s fs6, fa0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s7, a0 +; CHECK-NEXT: fmv.s fs5, fa0 +; CHECK-NEXT: mv a0, s6 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fmv.s fs4, fa0 ; CHECK-NEXT: mv a0, s5 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s5, a0 +; CHECK-NEXT: fmv.s fs3, fa0 ; CHECK-NEXT: mv a0, s4 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.l.s s2, fs6, rtz ; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s7 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s8 -; CHECK-NEXT: fcvt.l.s s7, ft0, rtz -; CHECK-NEXT: mv a0, s6 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addiw a7, a1, -1 -; CHECK-NEXT: blt a0, a7, .LBB15_2 +; CHECK-NEXT: bge a0, a7, .LBB15_18 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a7 +; CHECK-NEXT: fcvt.l.s a1, fs5, rtz +; CHECK-NEXT: bge s2, a7, .LBB15_19 ; CHECK-NEXT: .LBB15_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s5 -; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s7, a7, .LBB15_4 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s7, a7 +; CHECK-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NEXT: bge a1, a7, .LBB15_20 +; CHECK-NEXT: .LBB15_3: # %entry +; CHECK-NEXT: fcvt.l.s a3, fs3, rtz +; CHECK-NEXT: bge a2, a7, .LBB15_21 ; CHECK-NEXT: .LBB15_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, a7, .LBB15_6 -; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, a7 -; CHECK-NEXT: .LBB15_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s3 -; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, a7, .LBB15_8 -; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, a7 -; CHECK-NEXT: .LBB15_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s2 -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, a7, .LBB15_10 -; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, a7 -; CHECK-NEXT: .LBB15_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s1 -; CHECK-NEXT: fcvt.l.s a5, ft0, rtz +; CHECK-NEXT: fcvt.l.s a4, fs2, rtz +; CHECK-NEXT: bge a3, a7, .LBB15_22 +; CHECK-NEXT: .LBB15_5: # %entry +; CHECK-NEXT: fcvt.l.s a5, fs1, rtz ; CHECK-NEXT: bge a4, a7, .LBB15_23 -; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: .LBB15_6: # %entry +; CHECK-NEXT: fcvt.l.s a6, fs0, rtz ; CHECK-NEXT: bge a5, a7, .LBB15_24 -; CHECK-NEXT: .LBB15_12: # %entry +; CHECK-NEXT: .LBB15_7: # %entry ; CHECK-NEXT: bge a6, a7, .LBB15_25 -; CHECK-NEXT: .LBB15_13: # %entry +; CHECK-NEXT: .LBB15_8: # %entry ; CHECK-NEXT: lui a7, 1048568 ; CHECK-NEXT: bge a7, a6, .LBB15_26 -; CHECK-NEXT: .LBB15_14: # %entry +; CHECK-NEXT: .LBB15_9: # %entry ; CHECK-NEXT: bge a7, a5, .LBB15_27 -; CHECK-NEXT: .LBB15_15: # %entry +; CHECK-NEXT: .LBB15_10: # %entry ; CHECK-NEXT: bge a7, a4, .LBB15_28 -; CHECK-NEXT: .LBB15_16: # %entry +; CHECK-NEXT: .LBB15_11: # %entry ; CHECK-NEXT: bge a7, a3, .LBB15_29 -; CHECK-NEXT: .LBB15_17: # %entry +; CHECK-NEXT: .LBB15_12: # %entry ; CHECK-NEXT: bge a7, a2, .LBB15_30 -; CHECK-NEXT: .LBB15_18: # %entry +; CHECK-NEXT: .LBB15_13: # %entry ; CHECK-NEXT: bge a7, a1, .LBB15_31 -; CHECK-NEXT: .LBB15_19: # %entry -; CHECK-NEXT: bge a7, s7, .LBB15_32 -; CHECK-NEXT: .LBB15_20: # %entry -; CHECK-NEXT: blt a7, a0, .LBB15_22 -; CHECK-NEXT: .LBB15_21: # %entry +; CHECK-NEXT: .LBB15_14: # %entry +; CHECK-NEXT: bge a7, s2, .LBB15_32 +; CHECK-NEXT: .LBB15_15: # %entry +; CHECK-NEXT: blt a7, a0, .LBB15_17 +; CHECK-NEXT: .LBB15_16: # %entry ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: .LBB15_22: # %entry +; CHECK-NEXT: .LBB15_17: # %entry ; CHECK-NEXT: sh a0, 14(s0) -; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh s2, 12(s0) ; CHECK-NEXT: sh a1, 10(s0) ; CHECK-NEXT: sh a2, 8(s0) ; CHECK-NEXT: sh a3, 6(s0) ; CHECK-NEXT: sh a4, 4(s0) ; CHECK-NEXT: sh a5, 2(s0) ; CHECK-NEXT: sh a6, 0(s0) -; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 96 +; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs3, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs5, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs6, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 128 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB15_18: # %entry +; CHECK-NEXT: mv a0, a7 +; CHECK-NEXT: fcvt.l.s a1, fs5, rtz +; CHECK-NEXT: blt s2, a7, .LBB15_2 +; CHECK-NEXT: .LBB15_19: # %entry +; CHECK-NEXT: mv s2, a7 +; CHECK-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NEXT: blt a1, a7, .LBB15_3 +; CHECK-NEXT: .LBB15_20: # %entry +; CHECK-NEXT: mv a1, a7 +; CHECK-NEXT: fcvt.l.s a3, fs3, rtz +; CHECK-NEXT: blt a2, a7, .LBB15_4 +; CHECK-NEXT: .LBB15_21: # %entry +; CHECK-NEXT: mv a2, a7 +; CHECK-NEXT: fcvt.l.s a4, fs2, rtz +; CHECK-NEXT: blt a3, a7, .LBB15_5 +; CHECK-NEXT: .LBB15_22: # %entry +; CHECK-NEXT: mv a3, a7 +; CHECK-NEXT: fcvt.l.s a5, fs1, rtz +; CHECK-NEXT: blt a4, a7, .LBB15_6 ; CHECK-NEXT: .LBB15_23: # %entry ; CHECK-NEXT: mv a4, a7 -; CHECK-NEXT: fcvt.l.s a6, ft1, rtz -; CHECK-NEXT: blt a5, a7, .LBB15_12 +; CHECK-NEXT: fcvt.l.s a6, fs0, rtz +; CHECK-NEXT: blt a5, a7, .LBB15_7 ; CHECK-NEXT: .LBB15_24: # %entry ; CHECK-NEXT: mv a5, a7 -; CHECK-NEXT: blt a6, a7, .LBB15_13 +; CHECK-NEXT: blt a6, a7, .LBB15_8 ; CHECK-NEXT: .LBB15_25: # %entry ; CHECK-NEXT: mv a6, a7 ; CHECK-NEXT: lui a7, 1048568 -; CHECK-NEXT: blt a7, a6, .LBB15_14 +; CHECK-NEXT: blt a7, a6, .LBB15_9 ; CHECK-NEXT: .LBB15_26: # %entry ; CHECK-NEXT: lui a6, 1048568 -; CHECK-NEXT: blt a7, a5, .LBB15_15 +; CHECK-NEXT: blt a7, a5, .LBB15_10 ; CHECK-NEXT: .LBB15_27: # %entry ; CHECK-NEXT: lui a5, 1048568 -; CHECK-NEXT: blt a7, a4, .LBB15_16 +; CHECK-NEXT: blt a7, a4, .LBB15_11 ; CHECK-NEXT: .LBB15_28: # %entry ; CHECK-NEXT: lui a4, 1048568 -; CHECK-NEXT: blt a7, a3, .LBB15_17 +; CHECK-NEXT: blt a7, a3, .LBB15_12 ; CHECK-NEXT: .LBB15_29: # %entry ; CHECK-NEXT: lui a3, 1048568 -; CHECK-NEXT: blt a7, a2, .LBB15_18 +; CHECK-NEXT: blt a7, a2, .LBB15_13 ; CHECK-NEXT: .LBB15_30: # %entry ; CHECK-NEXT: lui a2, 1048568 -; CHECK-NEXT: blt a7, a1, .LBB15_19 +; CHECK-NEXT: blt a7, a1, .LBB15_14 ; CHECK-NEXT: .LBB15_31: # %entry ; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: blt a7, s7, .LBB15_20 +; CHECK-NEXT: blt a7, s2, .LBB15_15 ; CHECK-NEXT: .LBB15_32: # %entry -; CHECK-NEXT: lui s7, 1048568 -; CHECK-NEXT: bge a7, a0, .LBB15_21 -; CHECK-NEXT: j .LBB15_22 +; CHECK-NEXT: lui s2, 1048568 +; CHECK-NEXT: bge a7, a0, .LBB15_16 +; CHECK-NEXT: j .LBB15_17 entry: %conv = fptosi <8 x half> %x to <8 x i32> %0 = icmp slt <8 x i32> %conv, @@ -1076,18 +1077,24 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-LABEL: utesth_f16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -96 -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -128 +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs3, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs5, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs6, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -1097,115 +1104,127 @@ ; CHECK-NEXT: .cfi_offset s5, -56 ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 -; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: lhu s6, 0(a1) -; CHECK-NEXT: lhu s1, 56(a1) -; CHECK-NEXT: lhu s2, 48(a1) -; CHECK-NEXT: lhu s3, 40(a1) -; CHECK-NEXT: lhu s4, 32(a1) -; CHECK-NEXT: lhu s5, 24(a1) +; CHECK-NEXT: .cfi_offset fs0, -80 +; CHECK-NEXT: .cfi_offset fs1, -88 +; CHECK-NEXT: .cfi_offset fs2, -96 +; CHECK-NEXT: .cfi_offset fs3, -104 +; CHECK-NEXT: .cfi_offset fs4, -112 +; CHECK-NEXT: .cfi_offset fs5, -120 +; CHECK-NEXT: .cfi_offset fs6, -128 +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 56(a1) +; CHECK-NEXT: lhu s3, 48(a1) +; CHECK-NEXT: lhu s4, 40(a1) +; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: lhu s6, 24(a1) ; CHECK-NEXT: lhu s7, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: fmv.s fs6, fa0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s7, a0 +; CHECK-NEXT: fmv.s fs5, fa0 +; CHECK-NEXT: mv a0, s6 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fmv.s fs4, fa0 ; CHECK-NEXT: mv a0, s5 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s5, a0 +; CHECK-NEXT: fmv.s fs3, fa0 ; CHECK-NEXT: mv a0, s4 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.lu.s s2, fs6, rtz ; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s7 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s8 -; CHECK-NEXT: fcvt.lu.s s7, ft0, rtz -; CHECK-NEXT: mv a0, s6 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-NEXT: lui a1, 16 ; CHECK-NEXT: addiw a1, a1, -1 -; CHECK-NEXT: bltu a0, a1, .LBB16_2 +; CHECK-NEXT: bgeu a0, a1, .LBB16_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: fcvt.lu.s a2, fs5, rtz +; CHECK-NEXT: bgeu s2, a1, .LBB16_11 ; CHECK-NEXT: .LBB16_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s5 -; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.lu.s a2, ft0, rtz -; CHECK-NEXT: bltu s7, a1, .LBB16_4 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s7, a1 +; CHECK-NEXT: fcvt.lu.s a3, fs4, rtz +; CHECK-NEXT: bgeu a2, a1, .LBB16_12 +; CHECK-NEXT: .LBB16_3: # %entry +; CHECK-NEXT: fcvt.lu.s a4, fs3, rtz +; CHECK-NEXT: bgeu a3, a1, .LBB16_13 ; CHECK-NEXT: .LBB16_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.lu.s a3, ft1, rtz -; CHECK-NEXT: bltu a2, a1, .LBB16_6 -; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: .LBB16_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s3 -; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz -; CHECK-NEXT: bltu a3, a1, .LBB16_8 -; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB16_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s2 -; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz -; CHECK-NEXT: bltu a4, a1, .LBB16_10 -; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a4, a1 -; CHECK-NEXT: .LBB16_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s1 -; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz +; CHECK-NEXT: fcvt.lu.s a5, fs2, rtz +; CHECK-NEXT: bgeu a4, a1, .LBB16_14 +; CHECK-NEXT: .LBB16_5: # %entry +; CHECK-NEXT: fcvt.lu.s a6, fs1, rtz ; CHECK-NEXT: bgeu a5, a1, .LBB16_15 -; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz +; CHECK-NEXT: .LBB16_6: # %entry +; CHECK-NEXT: fcvt.lu.s a7, fs0, rtz ; CHECK-NEXT: bgeu a6, a1, .LBB16_16 -; CHECK-NEXT: .LBB16_12: # %entry -; CHECK-NEXT: bltu a7, a1, .LBB16_14 -; CHECK-NEXT: .LBB16_13: # %entry +; CHECK-NEXT: .LBB16_7: # %entry +; CHECK-NEXT: bltu a7, a1, .LBB16_9 +; CHECK-NEXT: .LBB16_8: # %entry ; CHECK-NEXT: mv a7, a1 -; CHECK-NEXT: .LBB16_14: # %entry +; CHECK-NEXT: .LBB16_9: # %entry ; CHECK-NEXT: sh a7, 14(s0) ; CHECK-NEXT: sh a6, 12(s0) ; CHECK-NEXT: sh a5, 10(s0) ; CHECK-NEXT: sh a4, 8(s0) ; CHECK-NEXT: sh a3, 6(s0) ; CHECK-NEXT: sh a2, 4(s0) -; CHECK-NEXT: sh s7, 2(s0) +; CHECK-NEXT: sh s2, 2(s0) ; CHECK-NEXT: sh a0, 0(s0) -; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 96 +; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs3, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs5, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs6, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 128 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB16_10: # %entry +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: fcvt.lu.s a2, fs5, rtz +; CHECK-NEXT: bltu s2, a1, .LBB16_2 +; CHECK-NEXT: .LBB16_11: # %entry +; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: fcvt.lu.s a3, fs4, rtz +; CHECK-NEXT: bltu a2, a1, .LBB16_3 +; CHECK-NEXT: .LBB16_12: # %entry +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: fcvt.lu.s a4, fs3, rtz +; CHECK-NEXT: bltu a3, a1, .LBB16_4 +; CHECK-NEXT: .LBB16_13: # %entry +; CHECK-NEXT: mv a3, a1 +; CHECK-NEXT: fcvt.lu.s a5, fs2, rtz +; CHECK-NEXT: bltu a4, a1, .LBB16_5 +; CHECK-NEXT: .LBB16_14: # %entry +; CHECK-NEXT: mv a4, a1 +; CHECK-NEXT: fcvt.lu.s a6, fs1, rtz +; CHECK-NEXT: bltu a5, a1, .LBB16_6 ; CHECK-NEXT: .LBB16_15: # %entry ; CHECK-NEXT: mv a5, a1 -; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz -; CHECK-NEXT: bltu a6, a1, .LBB16_12 +; CHECK-NEXT: fcvt.lu.s a7, fs0, rtz +; CHECK-NEXT: bltu a6, a1, .LBB16_7 ; CHECK-NEXT: .LBB16_16: # %entry ; CHECK-NEXT: mv a6, a1 -; CHECK-NEXT: bgeu a7, a1, .LBB16_13 -; CHECK-NEXT: j .LBB16_14 +; CHECK-NEXT: bgeu a7, a1, .LBB16_8 +; CHECK-NEXT: j .LBB16_9 entry: %conv = fptoui <8 x half> %x to <8 x i32> %0 = icmp ult <8 x i32> %conv, @@ -1217,18 +1236,24 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-LABEL: ustest_f16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -96 -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -128 +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs3, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs5, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs6, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -1238,155 +1263,167 @@ ; CHECK-NEXT: .cfi_offset s5, -56 ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 -; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s1, 0(a1) -; CHECK-NEXT: lhu s2, 8(a1) -; CHECK-NEXT: lhu s3, 16(a1) -; CHECK-NEXT: lhu s4, 24(a1) -; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: .cfi_offset fs0, -80 +; CHECK-NEXT: .cfi_offset fs1, -88 +; CHECK-NEXT: .cfi_offset fs2, -96 +; CHECK-NEXT: .cfi_offset fs3, -104 +; CHECK-NEXT: .cfi_offset fs4, -112 +; CHECK-NEXT: .cfi_offset fs5, -120 +; CHECK-NEXT: .cfi_offset fs6, -128 +; CHECK-NEXT: lhu s1, 56(a1) +; CHECK-NEXT: lhu s2, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) +; CHECK-NEXT: lhu s4, 16(a1) +; CHECK-NEXT: lhu s5, 24(a1) +; CHECK-NEXT: lhu s6, 32(a1) ; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: fmv.s fs6, fa0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s7, a0 +; CHECK-NEXT: fmv.s fs5, fa0 +; CHECK-NEXT: mv a0, s6 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fmv.s fs4, fa0 ; CHECK-NEXT: mv a0, s5 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s5, a0 +; CHECK-NEXT: fmv.s fs3, fa0 ; CHECK-NEXT: mv a0, s4 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.l.s s2, fs6, rtz ; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s7 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s8 -; CHECK-NEXT: fcvt.l.s s7, ft0, rtz -; CHECK-NEXT: mv a0, s6 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NEXT: lui a1, 16 ; CHECK-NEXT: addiw a7, a1, -1 -; CHECK-NEXT: blt a0, a7, .LBB17_2 +; CHECK-NEXT: bge a0, a7, .LBB17_18 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a7 +; CHECK-NEXT: fcvt.l.s a1, fs5, rtz +; CHECK-NEXT: bge s2, a7, .LBB17_19 ; CHECK-NEXT: .LBB17_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s5 -; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s7, a7, .LBB17_4 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s7, a7 +; CHECK-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NEXT: bge a1, a7, .LBB17_20 +; CHECK-NEXT: .LBB17_3: # %entry +; CHECK-NEXT: fcvt.l.s a3, fs3, rtz +; CHECK-NEXT: bge a2, a7, .LBB17_21 ; CHECK-NEXT: .LBB17_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, a7, .LBB17_6 -; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, a7 -; CHECK-NEXT: .LBB17_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s3 -; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, a7, .LBB17_8 -; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, a7 -; CHECK-NEXT: .LBB17_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s2 -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, a7, .LBB17_10 -; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, a7 -; CHECK-NEXT: .LBB17_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s1 -; CHECK-NEXT: fcvt.l.s a5, ft0, rtz +; CHECK-NEXT: fcvt.l.s a4, fs2, rtz +; CHECK-NEXT: bge a3, a7, .LBB17_22 +; CHECK-NEXT: .LBB17_5: # %entry +; CHECK-NEXT: fcvt.l.s a5, fs1, rtz ; CHECK-NEXT: bge a4, a7, .LBB17_23 -; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: .LBB17_6: # %entry +; CHECK-NEXT: fcvt.l.s a6, fs0, rtz ; CHECK-NEXT: bge a5, a7, .LBB17_24 -; CHECK-NEXT: .LBB17_12: # %entry +; CHECK-NEXT: .LBB17_7: # %entry ; CHECK-NEXT: bge a6, a7, .LBB17_25 -; CHECK-NEXT: .LBB17_13: # %entry +; CHECK-NEXT: .LBB17_8: # %entry ; CHECK-NEXT: blez a6, .LBB17_26 -; CHECK-NEXT: .LBB17_14: # %entry +; CHECK-NEXT: .LBB17_9: # %entry ; CHECK-NEXT: blez a5, .LBB17_27 -; CHECK-NEXT: .LBB17_15: # %entry +; CHECK-NEXT: .LBB17_10: # %entry ; CHECK-NEXT: blez a4, .LBB17_28 -; CHECK-NEXT: .LBB17_16: # %entry +; CHECK-NEXT: .LBB17_11: # %entry ; CHECK-NEXT: blez a3, .LBB17_29 -; CHECK-NEXT: .LBB17_17: # %entry +; CHECK-NEXT: .LBB17_12: # %entry ; CHECK-NEXT: blez a2, .LBB17_30 -; CHECK-NEXT: .LBB17_18: # %entry +; CHECK-NEXT: .LBB17_13: # %entry ; CHECK-NEXT: blez a1, .LBB17_31 -; CHECK-NEXT: .LBB17_19: # %entry -; CHECK-NEXT: blez s7, .LBB17_32 -; CHECK-NEXT: .LBB17_20: # %entry -; CHECK-NEXT: bgtz a0, .LBB17_22 -; CHECK-NEXT: .LBB17_21: # %entry +; CHECK-NEXT: .LBB17_14: # %entry +; CHECK-NEXT: blez s2, .LBB17_32 +; CHECK-NEXT: .LBB17_15: # %entry +; CHECK-NEXT: bgtz a0, .LBB17_17 +; CHECK-NEXT: .LBB17_16: # %entry ; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: .LBB17_22: # %entry +; CHECK-NEXT: .LBB17_17: # %entry ; CHECK-NEXT: sh a0, 14(s0) -; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh s2, 12(s0) ; CHECK-NEXT: sh a1, 10(s0) ; CHECK-NEXT: sh a2, 8(s0) ; CHECK-NEXT: sh a3, 6(s0) ; CHECK-NEXT: sh a4, 4(s0) ; CHECK-NEXT: sh a5, 2(s0) ; CHECK-NEXT: sh a6, 0(s0) -; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 96 +; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs3, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs5, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs6, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 128 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB17_18: # %entry +; CHECK-NEXT: mv a0, a7 +; CHECK-NEXT: fcvt.l.s a1, fs5, rtz +; CHECK-NEXT: blt s2, a7, .LBB17_2 +; CHECK-NEXT: .LBB17_19: # %entry +; CHECK-NEXT: mv s2, a7 +; CHECK-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NEXT: blt a1, a7, .LBB17_3 +; CHECK-NEXT: .LBB17_20: # %entry +; CHECK-NEXT: mv a1, a7 +; CHECK-NEXT: fcvt.l.s a3, fs3, rtz +; CHECK-NEXT: blt a2, a7, .LBB17_4 +; CHECK-NEXT: .LBB17_21: # %entry +; CHECK-NEXT: mv a2, a7 +; CHECK-NEXT: fcvt.l.s a4, fs2, rtz +; CHECK-NEXT: blt a3, a7, .LBB17_5 +; CHECK-NEXT: .LBB17_22: # %entry +; CHECK-NEXT: mv a3, a7 +; CHECK-NEXT: fcvt.l.s a5, fs1, rtz +; CHECK-NEXT: blt a4, a7, .LBB17_6 ; CHECK-NEXT: .LBB17_23: # %entry ; CHECK-NEXT: mv a4, a7 -; CHECK-NEXT: fcvt.l.s a6, ft1, rtz -; CHECK-NEXT: blt a5, a7, .LBB17_12 +; CHECK-NEXT: fcvt.l.s a6, fs0, rtz +; CHECK-NEXT: blt a5, a7, .LBB17_7 ; CHECK-NEXT: .LBB17_24: # %entry ; CHECK-NEXT: mv a5, a7 -; CHECK-NEXT: blt a6, a7, .LBB17_13 +; CHECK-NEXT: blt a6, a7, .LBB17_8 ; CHECK-NEXT: .LBB17_25: # %entry ; CHECK-NEXT: mv a6, a7 -; CHECK-NEXT: bgtz a6, .LBB17_14 +; CHECK-NEXT: bgtz a6, .LBB17_9 ; CHECK-NEXT: .LBB17_26: # %entry ; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: bgtz a5, .LBB17_15 +; CHECK-NEXT: bgtz a5, .LBB17_10 ; CHECK-NEXT: .LBB17_27: # %entry ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: bgtz a4, .LBB17_16 +; CHECK-NEXT: bgtz a4, .LBB17_11 ; CHECK-NEXT: .LBB17_28: # %entry ; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: bgtz a3, .LBB17_17 +; CHECK-NEXT: bgtz a3, .LBB17_12 ; CHECK-NEXT: .LBB17_29: # %entry ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: bgtz a2, .LBB17_18 +; CHECK-NEXT: bgtz a2, .LBB17_13 ; CHECK-NEXT: .LBB17_30: # %entry ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bgtz a1, .LBB17_19 +; CHECK-NEXT: bgtz a1, .LBB17_14 ; CHECK-NEXT: .LBB17_31: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s7, .LBB17_20 +; CHECK-NEXT: bgtz s2, .LBB17_15 ; CHECK-NEXT: .LBB17_32: # %entry -; CHECK-NEXT: li s7, 0 -; CHECK-NEXT: blez a0, .LBB17_21 -; CHECK-NEXT: j .LBB17_22 +; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: blez a0, .LBB17_16 +; CHECK-NEXT: j .LBB17_17 entry: %conv = fptosi <8 x half> %x to <8 x i32> %0 = icmp slt <8 x i32> %conv, @@ -1407,16 +1444,16 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.d fs0, fa1 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.d fa0, fs0 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: li a0, -1 @@ -1473,7 +1510,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret entry: @@ -1494,17 +1531,17 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.d fs0, fa0 +; CHECK-NEXT: fmv.d fa0, fa1 ; CHECK-NEXT: call __fixunsdfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.d fa0, fs0 ; CHECK-NEXT: call __fixunsdfti@plt ; CHECK-NEXT: beqz a1, .LBB19_2 ; CHECK-NEXT: # %bb.1: # %entry @@ -1518,7 +1555,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret entry: @@ -1537,17 +1574,17 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.d fs0, fa0 +; CHECK-NEXT: fmv.d fa0, fa1 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.d fa0, fs0 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bgtz a1, .LBB20_7 @@ -1597,7 +1634,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret entry: @@ -1618,16 +1655,16 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.s fs0, fa1 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.s fa0, fs0 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: li a0, -1 @@ -1684,7 +1721,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret entry: @@ -1705,17 +1742,17 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fmv.s fa0, fa1 ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.s fa0, fs0 ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: beqz a1, .LBB22_2 ; CHECK-NEXT: # %bb.1: # %entry @@ -1729,7 +1766,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret entry: @@ -1748,17 +1785,17 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fmv.s fa0, fa1 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.s fa0, fs0 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bgtz a1, .LBB23_7 @@ -1808,7 +1845,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret entry: @@ -2045,12 +2082,10 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i32_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a1 -; CHECK-NEXT: fmv.d.x ft1, a0 -; CHECK-NEXT: fcvt.l.d a1, ft0, rtz +; CHECK-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NEXT: lui a2, 524288 ; CHECK-NEXT: addiw a3, a2, -1 -; CHECK-NEXT: fcvt.l.d a0, ft1, rtz +; CHECK-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NEXT: bge a1, a3, .LBB27_5 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bge a0, a3, .LBB27_6 @@ -2083,12 +2118,10 @@ define <2 x i32> @utest_f64i32_mm(<2 x double> %x) { ; CHECK-LABEL: utest_f64i32_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: fmv.d.x ft1, a1 -; CHECK-NEXT: fcvt.lu.d a0, ft0, rtz +; CHECK-NEXT: fcvt.lu.d a0, fa0, rtz ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: srli a2, a1, 32 -; CHECK-NEXT: fcvt.lu.d a1, ft1, rtz +; CHECK-NEXT: fcvt.lu.d a1, fa1, rtz ; CHECK-NEXT: bgeu a0, a2, .LBB28_3 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bgeu a1, a2, .LBB28_4 @@ -2110,12 +2143,10 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i32_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a1 -; CHECK-NEXT: fmv.d.x ft1, a0 -; CHECK-NEXT: fcvt.l.d a1, ft0, rtz +; CHECK-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: srli a2, a0, 32 -; CHECK-NEXT: fcvt.l.d a0, ft1, rtz +; CHECK-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NEXT: bge a1, a2, .LBB29_5 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bge a0, a2, .LBB29_6 @@ -2148,61 +2179,59 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fmv.w.x ft2, a3 -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: lui a4, 524288 -; CHECK-NEXT: addiw a6, a4, -1 -; CHECK-NEXT: fcvt.l.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a6, .LBB30_2 +; CHECK-NEXT: fcvt.l.s a1, fa3, rtz +; CHECK-NEXT: lui a3, 524288 +; CHECK-NEXT: addiw a6, a3, -1 +; CHECK-NEXT: fcvt.l.s a2, fa2, rtz +; CHECK-NEXT: bge a1, a6, .LBB30_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a6 +; CHECK-NEXT: fcvt.l.s a4, fa1, rtz +; CHECK-NEXT: bge a2, a6, .LBB30_11 ; CHECK-NEXT: .LBB30_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a6, .LBB30_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a5, ft1, rtz -; CHECK-NEXT: bge a1, a6, .LBB30_12 -; CHECK-NEXT: .LBB30_4: # %entry +; CHECK-NEXT: fcvt.l.s a5, fa0, rtz +; CHECK-NEXT: bge a4, a6, .LBB30_12 +; CHECK-NEXT: .LBB30_3: # %entry ; CHECK-NEXT: bge a5, a6, .LBB30_13 +; CHECK-NEXT: .LBB30_4: # %entry +; CHECK-NEXT: bge a3, a5, .LBB30_14 ; CHECK-NEXT: .LBB30_5: # %entry -; CHECK-NEXT: bge a4, a5, .LBB30_14 +; CHECK-NEXT: bge a3, a4, .LBB30_15 ; CHECK-NEXT: .LBB30_6: # %entry -; CHECK-NEXT: bge a4, a1, .LBB30_15 +; CHECK-NEXT: bge a3, a2, .LBB30_16 ; CHECK-NEXT: .LBB30_7: # %entry -; CHECK-NEXT: bge a4, a3, .LBB30_16 +; CHECK-NEXT: blt a3, a1, .LBB30_9 ; CHECK-NEXT: .LBB30_8: # %entry -; CHECK-NEXT: blt a4, a2, .LBB30_10 +; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: .LBB30_9: # %entry -; CHECK-NEXT: lui a2, 524288 -; CHECK-NEXT: .LBB30_10: # %entry -; CHECK-NEXT: sw a2, 12(a0) -; CHECK-NEXT: sw a3, 8(a0) -; CHECK-NEXT: sw a1, 4(a0) +; CHECK-NEXT: sw a1, 12(a0) +; CHECK-NEXT: sw a2, 8(a0) +; CHECK-NEXT: sw a4, 4(a0) ; CHECK-NEXT: sw a5, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB30_10: # %entry +; CHECK-NEXT: mv a1, a6 +; CHECK-NEXT: fcvt.l.s a4, fa1, rtz +; CHECK-NEXT: blt a2, a6, .LBB30_2 ; CHECK-NEXT: .LBB30_11: # %entry -; CHECK-NEXT: mv a3, a6 -; CHECK-NEXT: fcvt.l.s a5, ft1, rtz -; CHECK-NEXT: blt a1, a6, .LBB30_4 +; CHECK-NEXT: mv a2, a6 +; CHECK-NEXT: fcvt.l.s a5, fa0, rtz +; CHECK-NEXT: blt a4, a6, .LBB30_3 ; CHECK-NEXT: .LBB30_12: # %entry -; CHECK-NEXT: mv a1, a6 -; CHECK-NEXT: blt a5, a6, .LBB30_5 +; CHECK-NEXT: mv a4, a6 +; CHECK-NEXT: blt a5, a6, .LBB30_4 ; CHECK-NEXT: .LBB30_13: # %entry ; CHECK-NEXT: mv a5, a6 -; CHECK-NEXT: blt a4, a5, .LBB30_6 +; CHECK-NEXT: blt a3, a5, .LBB30_5 ; CHECK-NEXT: .LBB30_14: # %entry ; CHECK-NEXT: lui a5, 524288 -; CHECK-NEXT: blt a4, a1, .LBB30_7 +; CHECK-NEXT: blt a3, a4, .LBB30_6 ; CHECK-NEXT: .LBB30_15: # %entry -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: blt a4, a3, .LBB30_8 +; CHECK-NEXT: lui a4, 524288 +; CHECK-NEXT: blt a3, a2, .LBB30_7 ; CHECK-NEXT: .LBB30_16: # %entry -; CHECK-NEXT: lui a3, 524288 -; CHECK-NEXT: bge a4, a2, .LBB30_9 -; CHECK-NEXT: j .LBB30_10 +; CHECK-NEXT: lui a2, 524288 +; CHECK-NEXT: bge a3, a1, .LBB30_8 +; CHECK-NEXT: j .LBB30_9 entry: %conv = fptosi <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2214,41 +2243,39 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: utest_f32i32_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fmv.w.x ft2, a2 -; CHECK-NEXT: fmv.w.x ft0, a3 -; CHECK-NEXT: fcvt.lu.s a1, ft1, rtz +; CHECK-NEXT: fcvt.lu.s a1, fa0, rtz ; CHECK-NEXT: li a2, -1 ; CHECK-NEXT: srli a3, a2, 32 -; CHECK-NEXT: fcvt.lu.s a2, ft2, rtz -; CHECK-NEXT: bltu a1, a3, .LBB31_2 +; CHECK-NEXT: fcvt.lu.s a2, fa1, rtz +; CHECK-NEXT: bgeu a1, a3, .LBB31_6 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB31_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz +; CHECK-NEXT: fcvt.lu.s a4, fa2, rtz ; CHECK-NEXT: bgeu a2, a3, .LBB31_7 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz +; CHECK-NEXT: .LBB31_2: # %entry +; CHECK-NEXT: fcvt.lu.s a5, fa3, rtz ; CHECK-NEXT: bgeu a4, a3, .LBB31_8 +; CHECK-NEXT: .LBB31_3: # %entry +; CHECK-NEXT: bltu a5, a3, .LBB31_5 ; CHECK-NEXT: .LBB31_4: # %entry -; CHECK-NEXT: bltu a5, a3, .LBB31_6 -; CHECK-NEXT: .LBB31_5: # %entry ; CHECK-NEXT: mv a5, a3 -; CHECK-NEXT: .LBB31_6: # %entry +; CHECK-NEXT: .LBB31_5: # %entry ; CHECK-NEXT: sw a5, 12(a0) ; CHECK-NEXT: sw a4, 8(a0) ; CHECK-NEXT: sw a2, 4(a0) ; CHECK-NEXT: sw a1, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB31_6: # %entry +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: fcvt.lu.s a4, fa2, rtz +; CHECK-NEXT: bltu a2, a3, .LBB31_2 ; CHECK-NEXT: .LBB31_7: # %entry ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz -; CHECK-NEXT: bltu a4, a3, .LBB31_4 +; CHECK-NEXT: fcvt.lu.s a5, fa3, rtz +; CHECK-NEXT: bltu a4, a3, .LBB31_3 ; CHECK-NEXT: .LBB31_8: # %entry ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: bgeu a5, a3, .LBB31_5 -; CHECK-NEXT: j .LBB31_6 +; CHECK-NEXT: bgeu a5, a3, .LBB31_4 +; CHECK-NEXT: j .LBB31_5 entry: %conv = fptoui <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2259,61 +2286,59 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i32_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fmv.w.x ft2, a3 -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: li a3, -1 -; CHECK-NEXT: srli a5, a3, 32 -; CHECK-NEXT: fcvt.l.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a5, .LBB32_2 +; CHECK-NEXT: fcvt.l.s a1, fa3, rtz +; CHECK-NEXT: li a2, -1 +; CHECK-NEXT: srli a5, a2, 32 +; CHECK-NEXT: fcvt.l.s a2, fa2, rtz +; CHECK-NEXT: bge a1, a5, .LBB32_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.l.s a3, fa1, rtz +; CHECK-NEXT: bge a2, a5, .LBB32_11 ; CHECK-NEXT: .LBB32_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a5, .LBB32_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: bge a1, a5, .LBB32_12 -; CHECK-NEXT: .LBB32_4: # %entry +; CHECK-NEXT: fcvt.l.s a4, fa0, rtz +; CHECK-NEXT: bge a3, a5, .LBB32_12 +; CHECK-NEXT: .LBB32_3: # %entry ; CHECK-NEXT: bge a4, a5, .LBB32_13 -; CHECK-NEXT: .LBB32_5: # %entry +; CHECK-NEXT: .LBB32_4: # %entry ; CHECK-NEXT: blez a4, .LBB32_14 +; CHECK-NEXT: .LBB32_5: # %entry +; CHECK-NEXT: blez a3, .LBB32_15 ; CHECK-NEXT: .LBB32_6: # %entry -; CHECK-NEXT: blez a1, .LBB32_15 +; CHECK-NEXT: blez a2, .LBB32_16 ; CHECK-NEXT: .LBB32_7: # %entry -; CHECK-NEXT: blez a3, .LBB32_16 +; CHECK-NEXT: bgtz a1, .LBB32_9 ; CHECK-NEXT: .LBB32_8: # %entry -; CHECK-NEXT: bgtz a2, .LBB32_10 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: .LBB32_9: # %entry -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: .LBB32_10: # %entry -; CHECK-NEXT: sw a2, 12(a0) -; CHECK-NEXT: sw a3, 8(a0) -; CHECK-NEXT: sw a1, 4(a0) +; CHECK-NEXT: sw a1, 12(a0) +; CHECK-NEXT: sw a2, 8(a0) +; CHECK-NEXT: sw a3, 4(a0) ; CHECK-NEXT: sw a4, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB32_10: # %entry +; CHECK-NEXT: mv a1, a5 +; CHECK-NEXT: fcvt.l.s a3, fa1, rtz +; CHECK-NEXT: blt a2, a5, .LBB32_2 ; CHECK-NEXT: .LBB32_11: # %entry -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a1, a5, .LBB32_4 +; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.l.s a4, fa0, rtz +; CHECK-NEXT: blt a3, a5, .LBB32_3 ; CHECK-NEXT: .LBB32_12: # %entry -; CHECK-NEXT: mv a1, a5 -; CHECK-NEXT: blt a4, a5, .LBB32_5 +; CHECK-NEXT: mv a3, a5 +; CHECK-NEXT: blt a4, a5, .LBB32_4 ; CHECK-NEXT: .LBB32_13: # %entry ; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: bgtz a4, .LBB32_6 +; CHECK-NEXT: bgtz a4, .LBB32_5 ; CHECK-NEXT: .LBB32_14: # %entry ; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: bgtz a1, .LBB32_7 +; CHECK-NEXT: bgtz a3, .LBB32_6 ; CHECK-NEXT: .LBB32_15: # %entry -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz a3, .LBB32_8 -; CHECK-NEXT: .LBB32_16: # %entry ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: blez a2, .LBB32_9 -; CHECK-NEXT: j .LBB32_10 +; CHECK-NEXT: bgtz a2, .LBB32_7 +; CHECK-NEXT: .LBB32_16: # %entry +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: blez a1, .LBB32_8 +; CHECK-NEXT: j .LBB32_9 entry: %conv = fptosi <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2332,63 +2357,59 @@ ; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 ; CHECK-NEXT: .cfi_offset s3, -40 -; CHECK-NEXT: .cfi_offset s4, -48 -; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: .cfi_offset fs0, -48 +; CHECK-NEXT: .cfi_offset fs1, -56 +; CHECK-NEXT: .cfi_offset fs2, -64 +; CHECK-NEXT: lhu s1, 24(a1) +; CHECK-NEXT: lhu s2, 0(a1) ; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s3 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.l.s s3, ft0, rtz +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.l.s s2, fs2, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: addiw a4, a1, -1 -; CHECK-NEXT: blt a0, a4, .LBB33_2 +; CHECK-NEXT: bge a0, a4, .LBB33_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: fcvt.l.s a2, fs1, rtz +; CHECK-NEXT: bge s2, a4, .LBB33_11 ; CHECK-NEXT: .LBB33_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: bge s3, a4, .LBB33_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a3, ft0, rtz +; CHECK-NEXT: fcvt.l.s a3, fs0, rtz ; CHECK-NEXT: bge a2, a4, .LBB33_12 -; CHECK-NEXT: .LBB33_4: # %entry +; CHECK-NEXT: .LBB33_3: # %entry ; CHECK-NEXT: bge a3, a4, .LBB33_13 -; CHECK-NEXT: .LBB33_5: # %entry +; CHECK-NEXT: .LBB33_4: # %entry ; CHECK-NEXT: bge a1, a3, .LBB33_14 -; CHECK-NEXT: .LBB33_6: # %entry +; CHECK-NEXT: .LBB33_5: # %entry ; CHECK-NEXT: bge a1, a2, .LBB33_15 +; CHECK-NEXT: .LBB33_6: # %entry +; CHECK-NEXT: bge a1, s2, .LBB33_16 ; CHECK-NEXT: .LBB33_7: # %entry -; CHECK-NEXT: bge a1, s3, .LBB33_16 +; CHECK-NEXT: blt a1, a0, .LBB33_9 ; CHECK-NEXT: .LBB33_8: # %entry -; CHECK-NEXT: blt a1, a0, .LBB33_10 -; CHECK-NEXT: .LBB33_9: # %entry ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: .LBB33_10: # %entry +; CHECK-NEXT: .LBB33_9: # %entry ; CHECK-NEXT: sw a0, 12(s0) -; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw s2, 8(s0) ; CHECK-NEXT: sw a2, 4(s0) ; CHECK-NEXT: sw a3, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -2396,29 +2417,35 @@ ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB33_10: # %entry +; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: fcvt.l.s a2, fs1, rtz +; CHECK-NEXT: blt s2, a4, .LBB33_2 ; CHECK-NEXT: .LBB33_11: # %entry -; CHECK-NEXT: mv s3, a4 -; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, a4, .LBB33_4 +; CHECK-NEXT: mv s2, a4 +; CHECK-NEXT: fcvt.l.s a3, fs0, rtz +; CHECK-NEXT: blt a2, a4, .LBB33_3 ; CHECK-NEXT: .LBB33_12: # %entry ; CHECK-NEXT: mv a2, a4 -; CHECK-NEXT: blt a3, a4, .LBB33_5 +; CHECK-NEXT: blt a3, a4, .LBB33_4 ; CHECK-NEXT: .LBB33_13: # %entry ; CHECK-NEXT: mv a3, a4 -; CHECK-NEXT: blt a1, a3, .LBB33_6 +; CHECK-NEXT: blt a1, a3, .LBB33_5 ; CHECK-NEXT: .LBB33_14: # %entry ; CHECK-NEXT: lui a3, 524288 -; CHECK-NEXT: blt a1, a2, .LBB33_7 +; CHECK-NEXT: blt a1, a2, .LBB33_6 ; CHECK-NEXT: .LBB33_15: # %entry ; CHECK-NEXT: lui a2, 524288 -; CHECK-NEXT: blt a1, s3, .LBB33_8 +; CHECK-NEXT: blt a1, s2, .LBB33_7 ; CHECK-NEXT: .LBB33_16: # %entry -; CHECK-NEXT: lui s3, 524288 -; CHECK-NEXT: bge a1, a0, .LBB33_9 -; CHECK-NEXT: j .LBB33_10 +; CHECK-NEXT: lui s2, 524288 +; CHECK-NEXT: bge a1, a0, .LBB33_8 +; CHECK-NEXT: j .LBB33_9 entry: %conv = fptosi <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2437,73 +2464,75 @@ ; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 ; CHECK-NEXT: .cfi_offset s3, -40 -; CHECK-NEXT: .cfi_offset s4, -48 -; CHECK-NEXT: lhu s2, 0(a1) -; CHECK-NEXT: lhu s1, 24(a1) +; CHECK-NEXT: .cfi_offset fs0, -48 +; CHECK-NEXT: .cfi_offset fs1, -56 +; CHECK-NEXT: .cfi_offset fs2, -64 +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 24(a1) ; CHECK-NEXT: lhu s3, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s3 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.lu.s s3, ft0, rtz +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.lu.s s2, fs2, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: srli a1, a1, 32 -; CHECK-NEXT: bltu a0, a1, .LBB34_2 +; CHECK-NEXT: bgeu a0, a1, .LBB34_6 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: fcvt.lu.s a2, fs1, rtz +; CHECK-NEXT: bgeu s2, a1, .LBB34_7 ; CHECK-NEXT: .LBB34_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.lu.s a2, ft1, rtz -; CHECK-NEXT: bgeu s3, a1, .LBB34_7 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz +; CHECK-NEXT: fcvt.lu.s a3, fs0, rtz ; CHECK-NEXT: bgeu a2, a1, .LBB34_8 +; CHECK-NEXT: .LBB34_3: # %entry +; CHECK-NEXT: bltu a3, a1, .LBB34_5 ; CHECK-NEXT: .LBB34_4: # %entry -; CHECK-NEXT: bltu a3, a1, .LBB34_6 -; CHECK-NEXT: .LBB34_5: # %entry ; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: .LBB34_6: # %entry +; CHECK-NEXT: .LBB34_5: # %entry ; CHECK-NEXT: sw a3, 12(s0) ; CHECK-NEXT: sw a2, 8(s0) -; CHECK-NEXT: sw s3, 4(s0) +; CHECK-NEXT: sw s2, 4(s0) ; CHECK-NEXT: sw a0, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB34_6: # %entry +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: fcvt.lu.s a2, fs1, rtz +; CHECK-NEXT: bltu s2, a1, .LBB34_2 ; CHECK-NEXT: .LBB34_7: # %entry -; CHECK-NEXT: mv s3, a1 -; CHECK-NEXT: fcvt.lu.s a3, ft0, rtz -; CHECK-NEXT: bltu a2, a1, .LBB34_4 +; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: fcvt.lu.s a3, fs0, rtz +; CHECK-NEXT: bltu a2, a1, .LBB34_3 ; CHECK-NEXT: .LBB34_8: # %entry ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: bgeu a3, a1, .LBB34_5 -; CHECK-NEXT: j .LBB34_6 +; CHECK-NEXT: bgeu a3, a1, .LBB34_4 +; CHECK-NEXT: j .LBB34_5 entry: %conv = fptoui <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2521,63 +2550,59 @@ ; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 ; CHECK-NEXT: .cfi_offset s2, -32 ; CHECK-NEXT: .cfi_offset s3, -40 -; CHECK-NEXT: .cfi_offset s4, -48 -; CHECK-NEXT: lhu s2, 24(a1) -; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: .cfi_offset fs0, -48 +; CHECK-NEXT: .cfi_offset fs1, -56 +; CHECK-NEXT: .cfi_offset fs2, -64 +; CHECK-NEXT: lhu s1, 24(a1) +; CHECK-NEXT: lhu s2, 0(a1) ; CHECK-NEXT: lhu s3, 8(a1) ; CHECK-NEXT: lhu a1, 16(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 -; CHECK-NEXT: mv a0, s1 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s3 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.l.s s3, ft0, rtz +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.l.s s2, fs2, rtz +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: srli a3, a1, 32 -; CHECK-NEXT: blt a0, a3, .LBB35_2 +; CHECK-NEXT: bge a0, a3, .LBB35_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a3 +; CHECK-NEXT: fcvt.l.s a1, fs1, rtz +; CHECK-NEXT: bge s2, a3, .LBB35_11 ; CHECK-NEXT: .LBB35_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s1 -; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.l.s a1, ft1, rtz -; CHECK-NEXT: bge s3, a3, .LBB35_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.l.s a2, ft0, rtz +; CHECK-NEXT: fcvt.l.s a2, fs0, rtz ; CHECK-NEXT: bge a1, a3, .LBB35_12 -; CHECK-NEXT: .LBB35_4: # %entry +; CHECK-NEXT: .LBB35_3: # %entry ; CHECK-NEXT: bge a2, a3, .LBB35_13 -; CHECK-NEXT: .LBB35_5: # %entry +; CHECK-NEXT: .LBB35_4: # %entry ; CHECK-NEXT: blez a2, .LBB35_14 -; CHECK-NEXT: .LBB35_6: # %entry +; CHECK-NEXT: .LBB35_5: # %entry ; CHECK-NEXT: blez a1, .LBB35_15 +; CHECK-NEXT: .LBB35_6: # %entry +; CHECK-NEXT: blez s2, .LBB35_16 ; CHECK-NEXT: .LBB35_7: # %entry -; CHECK-NEXT: blez s3, .LBB35_16 +; CHECK-NEXT: bgtz a0, .LBB35_9 ; CHECK-NEXT: .LBB35_8: # %entry -; CHECK-NEXT: bgtz a0, .LBB35_10 -; CHECK-NEXT: .LBB35_9: # %entry ; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: .LBB35_10: # %entry +; CHECK-NEXT: .LBB35_9: # %entry ; CHECK-NEXT: sw a0, 12(s0) -; CHECK-NEXT: sw s3, 8(s0) +; CHECK-NEXT: sw s2, 8(s0) ; CHECK-NEXT: sw a1, 4(s0) ; CHECK-NEXT: sw a2, 0(s0) ; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload @@ -2585,29 +2610,35 @@ ; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s3, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB35_10: # %entry +; CHECK-NEXT: mv a0, a3 +; CHECK-NEXT: fcvt.l.s a1, fs1, rtz +; CHECK-NEXT: blt s2, a3, .LBB35_2 ; CHECK-NEXT: .LBB35_11: # %entry -; CHECK-NEXT: mv s3, a3 -; CHECK-NEXT: fcvt.l.s a2, ft0, rtz -; CHECK-NEXT: blt a1, a3, .LBB35_4 +; CHECK-NEXT: mv s2, a3 +; CHECK-NEXT: fcvt.l.s a2, fs0, rtz +; CHECK-NEXT: blt a1, a3, .LBB35_3 ; CHECK-NEXT: .LBB35_12: # %entry ; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: blt a2, a3, .LBB35_5 +; CHECK-NEXT: blt a2, a3, .LBB35_4 ; CHECK-NEXT: .LBB35_13: # %entry ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: bgtz a2, .LBB35_6 +; CHECK-NEXT: bgtz a2, .LBB35_5 ; CHECK-NEXT: .LBB35_14: # %entry ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bgtz a1, .LBB35_7 +; CHECK-NEXT: bgtz a1, .LBB35_6 ; CHECK-NEXT: .LBB35_15: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s3, .LBB35_8 +; CHECK-NEXT: bgtz s2, .LBB35_7 ; CHECK-NEXT: .LBB35_16: # %entry -; CHECK-NEXT: li s3, 0 -; CHECK-NEXT: blez a0, .LBB35_9 -; CHECK-NEXT: j .LBB35_10 +; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: blez a0, .LBB35_8 +; CHECK-NEXT: j .LBB35_9 entry: %conv = fptosi <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2621,12 +2652,10 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i16_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a1 -; CHECK-NEXT: fmv.d.x ft1, a0 -; CHECK-NEXT: fcvt.w.d a1, ft0, rtz +; CHECK-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addiw a2, a0, -1 -; CHECK-NEXT: fcvt.w.d a0, ft1, rtz +; CHECK-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NEXT: bge a1, a2, .LBB36_5 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bge a0, a2, .LBB36_6 @@ -2661,12 +2690,10 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) { ; CHECK-LABEL: utest_f64i16_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: fmv.d.x ft1, a1 -; CHECK-NEXT: fcvt.wu.d a0, ft0, rtz +; CHECK-NEXT: fcvt.wu.d a0, fa0, rtz ; CHECK-NEXT: lui a1, 16 ; CHECK-NEXT: addiw a2, a1, -1 -; CHECK-NEXT: fcvt.wu.d a1, ft1, rtz +; CHECK-NEXT: fcvt.wu.d a1, fa1, rtz ; CHECK-NEXT: bgeu a0, a2, .LBB37_3 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bgeu a1, a2, .LBB37_4 @@ -2688,12 +2715,10 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i16_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a1 -; CHECK-NEXT: fmv.d.x ft1, a0 -; CHECK-NEXT: fcvt.w.d a1, ft0, rtz +; CHECK-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NEXT: lui a0, 16 ; CHECK-NEXT: addiw a2, a0, -1 -; CHECK-NEXT: fcvt.w.d a0, ft1, rtz +; CHECK-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NEXT: bge a1, a2, .LBB38_5 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: bge a0, a2, .LBB38_6 @@ -2726,63 +2751,61 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) { ; CHECK-LABEL: stest_f32i16_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fmv.w.x ft2, a3 -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: fcvt.w.s a2, ft1, rtz -; CHECK-NEXT: lui a3, 8 -; CHECK-NEXT: addiw a5, a3, -1 -; CHECK-NEXT: fcvt.w.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a5, .LBB39_2 +; CHECK-NEXT: fcvt.w.s a1, fa3, rtz +; CHECK-NEXT: lui a2, 8 +; CHECK-NEXT: addiw a5, a2, -1 +; CHECK-NEXT: fcvt.w.s a2, fa2, rtz +; CHECK-NEXT: bge a1, a5, .LBB39_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NEXT: bge a2, a5, .LBB39_11 ; CHECK-NEXT: .LBB39_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fcvt.w.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a5, .LBB39_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.w.s a4, ft1, rtz -; CHECK-NEXT: bge a1, a5, .LBB39_12 -; CHECK-NEXT: .LBB39_4: # %entry +; CHECK-NEXT: fcvt.w.s a4, fa0, rtz +; CHECK-NEXT: bge a3, a5, .LBB39_12 +; CHECK-NEXT: .LBB39_3: # %entry ; CHECK-NEXT: bge a4, a5, .LBB39_13 -; CHECK-NEXT: .LBB39_5: # %entry +; CHECK-NEXT: .LBB39_4: # %entry ; CHECK-NEXT: lui a5, 1048568 ; CHECK-NEXT: bge a5, a4, .LBB39_14 +; CHECK-NEXT: .LBB39_5: # %entry +; CHECK-NEXT: bge a5, a3, .LBB39_15 ; CHECK-NEXT: .LBB39_6: # %entry -; CHECK-NEXT: bge a5, a1, .LBB39_15 +; CHECK-NEXT: bge a5, a2, .LBB39_16 ; CHECK-NEXT: .LBB39_7: # %entry -; CHECK-NEXT: bge a5, a3, .LBB39_16 +; CHECK-NEXT: blt a5, a1, .LBB39_9 ; CHECK-NEXT: .LBB39_8: # %entry -; CHECK-NEXT: blt a5, a2, .LBB39_10 +; CHECK-NEXT: lui a1, 1048568 ; CHECK-NEXT: .LBB39_9: # %entry -; CHECK-NEXT: lui a2, 1048568 -; CHECK-NEXT: .LBB39_10: # %entry -; CHECK-NEXT: sh a2, 6(a0) -; CHECK-NEXT: sh a3, 4(a0) -; CHECK-NEXT: sh a1, 2(a0) +; CHECK-NEXT: sh a1, 6(a0) +; CHECK-NEXT: sh a2, 4(a0) +; CHECK-NEXT: sh a3, 2(a0) ; CHECK-NEXT: sh a4, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB39_10: # %entry +; CHECK-NEXT: mv a1, a5 +; CHECK-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NEXT: blt a2, a5, .LBB39_2 ; CHECK-NEXT: .LBB39_11: # %entry -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: fcvt.w.s a4, ft1, rtz -; CHECK-NEXT: blt a1, a5, .LBB39_4 +; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.w.s a4, fa0, rtz +; CHECK-NEXT: blt a3, a5, .LBB39_3 ; CHECK-NEXT: .LBB39_12: # %entry -; CHECK-NEXT: mv a1, a5 -; CHECK-NEXT: blt a4, a5, .LBB39_5 +; CHECK-NEXT: mv a3, a5 +; CHECK-NEXT: blt a4, a5, .LBB39_4 ; CHECK-NEXT: .LBB39_13: # %entry ; CHECK-NEXT: mv a4, a5 ; CHECK-NEXT: lui a5, 1048568 -; CHECK-NEXT: blt a5, a4, .LBB39_6 +; CHECK-NEXT: blt a5, a4, .LBB39_5 ; CHECK-NEXT: .LBB39_14: # %entry ; CHECK-NEXT: lui a4, 1048568 -; CHECK-NEXT: blt a5, a1, .LBB39_7 +; CHECK-NEXT: blt a5, a3, .LBB39_6 ; CHECK-NEXT: .LBB39_15: # %entry -; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: blt a5, a3, .LBB39_8 -; CHECK-NEXT: .LBB39_16: # %entry ; CHECK-NEXT: lui a3, 1048568 -; CHECK-NEXT: bge a5, a2, .LBB39_9 -; CHECK-NEXT: j .LBB39_10 +; CHECK-NEXT: blt a5, a2, .LBB39_7 +; CHECK-NEXT: .LBB39_16: # %entry +; CHECK-NEXT: lui a2, 1048568 +; CHECK-NEXT: bge a5, a1, .LBB39_8 +; CHECK-NEXT: j .LBB39_9 entry: %conv = fptosi <4 x float> %x to <4 x i32> %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> ) @@ -2794,41 +2817,39 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) { ; CHECK-LABEL: utest_f32i16_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fmv.w.x ft2, a2 -; CHECK-NEXT: fmv.w.x ft0, a3 -; CHECK-NEXT: fcvt.wu.s a1, ft1, rtz +; CHECK-NEXT: fcvt.wu.s a1, fa0, rtz ; CHECK-NEXT: lui a2, 16 ; CHECK-NEXT: addiw a3, a2, -1 -; CHECK-NEXT: fcvt.wu.s a2, ft2, rtz -; CHECK-NEXT: bltu a1, a3, .LBB40_2 +; CHECK-NEXT: fcvt.wu.s a2, fa1, rtz +; CHECK-NEXT: bgeu a1, a3, .LBB40_6 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a1, a3 -; CHECK-NEXT: .LBB40_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fcvt.wu.s a4, ft0, rtz +; CHECK-NEXT: fcvt.wu.s a4, fa2, rtz ; CHECK-NEXT: bgeu a2, a3, .LBB40_7 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.wu.s a5, ft1, rtz +; CHECK-NEXT: .LBB40_2: # %entry +; CHECK-NEXT: fcvt.wu.s a5, fa3, rtz ; CHECK-NEXT: bgeu a4, a3, .LBB40_8 +; CHECK-NEXT: .LBB40_3: # %entry +; CHECK-NEXT: bltu a5, a3, .LBB40_5 ; CHECK-NEXT: .LBB40_4: # %entry -; CHECK-NEXT: bltu a5, a3, .LBB40_6 -; CHECK-NEXT: .LBB40_5: # %entry ; CHECK-NEXT: mv a5, a3 -; CHECK-NEXT: .LBB40_6: # %entry +; CHECK-NEXT: .LBB40_5: # %entry ; CHECK-NEXT: sh a5, 6(a0) ; CHECK-NEXT: sh a4, 4(a0) ; CHECK-NEXT: sh a2, 2(a0) ; CHECK-NEXT: sh a1, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB40_6: # %entry +; CHECK-NEXT: mv a1, a3 +; CHECK-NEXT: fcvt.wu.s a4, fa2, rtz +; CHECK-NEXT: bltu a2, a3, .LBB40_2 ; CHECK-NEXT: .LBB40_7: # %entry ; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: fcvt.wu.s a5, ft1, rtz -; CHECK-NEXT: bltu a4, a3, .LBB40_4 +; CHECK-NEXT: fcvt.wu.s a5, fa3, rtz +; CHECK-NEXT: bltu a4, a3, .LBB40_3 ; CHECK-NEXT: .LBB40_8: # %entry ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: bgeu a5, a3, .LBB40_5 -; CHECK-NEXT: j .LBB40_6 +; CHECK-NEXT: bgeu a5, a3, .LBB40_4 +; CHECK-NEXT: j .LBB40_5 entry: %conv = fptoui <4 x float> %x to <4 x i32> %spec.store.select = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %conv, <4 x i32> ) @@ -2839,61 +2860,59 @@ define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) { ; CHECK-LABEL: ustest_f32i16_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft1, a4 -; CHECK-NEXT: fmv.w.x ft2, a3 -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: fcvt.w.s a2, ft1, rtz -; CHECK-NEXT: lui a3, 16 -; CHECK-NEXT: addiw a5, a3, -1 -; CHECK-NEXT: fcvt.w.s a3, ft2, rtz -; CHECK-NEXT: blt a2, a5, .LBB41_2 +; CHECK-NEXT: fcvt.w.s a1, fa3, rtz +; CHECK-NEXT: lui a2, 16 +; CHECK-NEXT: addiw a5, a2, -1 +; CHECK-NEXT: fcvt.w.s a2, fa2, rtz +; CHECK-NEXT: bge a1, a5, .LBB41_10 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NEXT: bge a2, a5, .LBB41_11 ; CHECK-NEXT: .LBB41_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, a1 -; CHECK-NEXT: fcvt.w.s a1, ft0, rtz -; CHECK-NEXT: bge a3, a5, .LBB41_11 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: fcvt.w.s a4, ft1, rtz -; CHECK-NEXT: bge a1, a5, .LBB41_12 -; CHECK-NEXT: .LBB41_4: # %entry +; CHECK-NEXT: fcvt.w.s a4, fa0, rtz +; CHECK-NEXT: bge a3, a5, .LBB41_12 +; CHECK-NEXT: .LBB41_3: # %entry ; CHECK-NEXT: bge a4, a5, .LBB41_13 -; CHECK-NEXT: .LBB41_5: # %entry +; CHECK-NEXT: .LBB41_4: # %entry ; CHECK-NEXT: blez a4, .LBB41_14 +; CHECK-NEXT: .LBB41_5: # %entry +; CHECK-NEXT: blez a3, .LBB41_15 ; CHECK-NEXT: .LBB41_6: # %entry -; CHECK-NEXT: blez a1, .LBB41_15 +; CHECK-NEXT: blez a2, .LBB41_16 ; CHECK-NEXT: .LBB41_7: # %entry -; CHECK-NEXT: blez a3, .LBB41_16 +; CHECK-NEXT: bgtz a1, .LBB41_9 ; CHECK-NEXT: .LBB41_8: # %entry -; CHECK-NEXT: bgtz a2, .LBB41_10 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: .LBB41_9: # %entry -; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: .LBB41_10: # %entry -; CHECK-NEXT: sh a2, 6(a0) -; CHECK-NEXT: sh a3, 4(a0) -; CHECK-NEXT: sh a1, 2(a0) +; CHECK-NEXT: sh a1, 6(a0) +; CHECK-NEXT: sh a2, 4(a0) +; CHECK-NEXT: sh a3, 2(a0) ; CHECK-NEXT: sh a4, 0(a0) ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB41_10: # %entry +; CHECK-NEXT: mv a1, a5 +; CHECK-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NEXT: blt a2, a5, .LBB41_2 ; CHECK-NEXT: .LBB41_11: # %entry -; CHECK-NEXT: mv a3, a5 -; CHECK-NEXT: fcvt.w.s a4, ft1, rtz -; CHECK-NEXT: blt a1, a5, .LBB41_4 +; CHECK-NEXT: mv a2, a5 +; CHECK-NEXT: fcvt.w.s a4, fa0, rtz +; CHECK-NEXT: blt a3, a5, .LBB41_3 ; CHECK-NEXT: .LBB41_12: # %entry -; CHECK-NEXT: mv a1, a5 -; CHECK-NEXT: blt a4, a5, .LBB41_5 +; CHECK-NEXT: mv a3, a5 +; CHECK-NEXT: blt a4, a5, .LBB41_4 ; CHECK-NEXT: .LBB41_13: # %entry ; CHECK-NEXT: mv a4, a5 -; CHECK-NEXT: bgtz a4, .LBB41_6 +; CHECK-NEXT: bgtz a4, .LBB41_5 ; CHECK-NEXT: .LBB41_14: # %entry ; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: bgtz a1, .LBB41_7 +; CHECK-NEXT: bgtz a3, .LBB41_6 ; CHECK-NEXT: .LBB41_15: # %entry -; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz a3, .LBB41_8 -; CHECK-NEXT: .LBB41_16: # %entry ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: blez a2, .LBB41_9 -; CHECK-NEXT: j .LBB41_10 +; CHECK-NEXT: bgtz a2, .LBB41_7 +; CHECK-NEXT: .LBB41_16: # %entry +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: blez a1, .LBB41_8 +; CHECK-NEXT: j .LBB41_9 entry: %conv = fptosi <4 x float> %x to <4 x i32> %spec.store.select = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> ) @@ -2905,18 +2924,24 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: stest_f16i16_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -96 -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -128 +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs3, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs5, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs6, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -2926,157 +2951,169 @@ ; CHECK-NEXT: .cfi_offset s5, -56 ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 -; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s1, 0(a1) -; CHECK-NEXT: lhu s2, 8(a1) -; CHECK-NEXT: lhu s3, 16(a1) -; CHECK-NEXT: lhu s4, 24(a1) -; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: .cfi_offset fs0, -80 +; CHECK-NEXT: .cfi_offset fs1, -88 +; CHECK-NEXT: .cfi_offset fs2, -96 +; CHECK-NEXT: .cfi_offset fs3, -104 +; CHECK-NEXT: .cfi_offset fs4, -112 +; CHECK-NEXT: .cfi_offset fs5, -120 +; CHECK-NEXT: .cfi_offset fs6, -128 +; CHECK-NEXT: lhu s1, 56(a1) +; CHECK-NEXT: lhu s2, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) +; CHECK-NEXT: lhu s4, 16(a1) +; CHECK-NEXT: lhu s5, 24(a1) +; CHECK-NEXT: lhu s6, 32(a1) ; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: fmv.s fs6, fa0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s7, a0 +; CHECK-NEXT: fmv.s fs5, fa0 +; CHECK-NEXT: mv a0, s6 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fmv.s fs4, fa0 ; CHECK-NEXT: mv a0, s5 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s5, a0 +; CHECK-NEXT: fmv.s fs3, fa0 ; CHECK-NEXT: mv a0, s4 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.l.s s2, fs6, rtz ; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s7 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s8 -; CHECK-NEXT: fcvt.l.s s7, ft0, rtz -; CHECK-NEXT: mv a0, s6 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addiw a7, a1, -1 -; CHECK-NEXT: blt a0, a7, .LBB42_2 +; CHECK-NEXT: bge a0, a7, .LBB42_18 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a7 +; CHECK-NEXT: fcvt.l.s a1, fs5, rtz +; CHECK-NEXT: bge s2, a7, .LBB42_19 ; CHECK-NEXT: .LBB42_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s5 -; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s7, a7, .LBB42_4 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s7, a7 +; CHECK-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NEXT: bge a1, a7, .LBB42_20 +; CHECK-NEXT: .LBB42_3: # %entry +; CHECK-NEXT: fcvt.l.s a3, fs3, rtz +; CHECK-NEXT: bge a2, a7, .LBB42_21 ; CHECK-NEXT: .LBB42_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, a7, .LBB42_6 -; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, a7 -; CHECK-NEXT: .LBB42_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s3 -; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, a7, .LBB42_8 -; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, a7 -; CHECK-NEXT: .LBB42_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s2 -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, a7, .LBB42_10 -; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, a7 -; CHECK-NEXT: .LBB42_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s1 -; CHECK-NEXT: fcvt.l.s a5, ft0, rtz +; CHECK-NEXT: fcvt.l.s a4, fs2, rtz +; CHECK-NEXT: bge a3, a7, .LBB42_22 +; CHECK-NEXT: .LBB42_5: # %entry +; CHECK-NEXT: fcvt.l.s a5, fs1, rtz ; CHECK-NEXT: bge a4, a7, .LBB42_23 -; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: .LBB42_6: # %entry +; CHECK-NEXT: fcvt.l.s a6, fs0, rtz ; CHECK-NEXT: bge a5, a7, .LBB42_24 -; CHECK-NEXT: .LBB42_12: # %entry +; CHECK-NEXT: .LBB42_7: # %entry ; CHECK-NEXT: bge a6, a7, .LBB42_25 -; CHECK-NEXT: .LBB42_13: # %entry +; CHECK-NEXT: .LBB42_8: # %entry ; CHECK-NEXT: lui a7, 1048568 ; CHECK-NEXT: bge a7, a6, .LBB42_26 -; CHECK-NEXT: .LBB42_14: # %entry +; CHECK-NEXT: .LBB42_9: # %entry ; CHECK-NEXT: bge a7, a5, .LBB42_27 -; CHECK-NEXT: .LBB42_15: # %entry +; CHECK-NEXT: .LBB42_10: # %entry ; CHECK-NEXT: bge a7, a4, .LBB42_28 -; CHECK-NEXT: .LBB42_16: # %entry +; CHECK-NEXT: .LBB42_11: # %entry ; CHECK-NEXT: bge a7, a3, .LBB42_29 -; CHECK-NEXT: .LBB42_17: # %entry +; CHECK-NEXT: .LBB42_12: # %entry ; CHECK-NEXT: bge a7, a2, .LBB42_30 -; CHECK-NEXT: .LBB42_18: # %entry +; CHECK-NEXT: .LBB42_13: # %entry ; CHECK-NEXT: bge a7, a1, .LBB42_31 -; CHECK-NEXT: .LBB42_19: # %entry -; CHECK-NEXT: bge a7, s7, .LBB42_32 -; CHECK-NEXT: .LBB42_20: # %entry -; CHECK-NEXT: blt a7, a0, .LBB42_22 -; CHECK-NEXT: .LBB42_21: # %entry +; CHECK-NEXT: .LBB42_14: # %entry +; CHECK-NEXT: bge a7, s2, .LBB42_32 +; CHECK-NEXT: .LBB42_15: # %entry +; CHECK-NEXT: blt a7, a0, .LBB42_17 +; CHECK-NEXT: .LBB42_16: # %entry ; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: .LBB42_22: # %entry +; CHECK-NEXT: .LBB42_17: # %entry ; CHECK-NEXT: sh a0, 14(s0) -; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh s2, 12(s0) ; CHECK-NEXT: sh a1, 10(s0) ; CHECK-NEXT: sh a2, 8(s0) ; CHECK-NEXT: sh a3, 6(s0) ; CHECK-NEXT: sh a4, 4(s0) ; CHECK-NEXT: sh a5, 2(s0) ; CHECK-NEXT: sh a6, 0(s0) -; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 96 +; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs3, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs5, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs6, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 128 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB42_18: # %entry +; CHECK-NEXT: mv a0, a7 +; CHECK-NEXT: fcvt.l.s a1, fs5, rtz +; CHECK-NEXT: blt s2, a7, .LBB42_2 +; CHECK-NEXT: .LBB42_19: # %entry +; CHECK-NEXT: mv s2, a7 +; CHECK-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NEXT: blt a1, a7, .LBB42_3 +; CHECK-NEXT: .LBB42_20: # %entry +; CHECK-NEXT: mv a1, a7 +; CHECK-NEXT: fcvt.l.s a3, fs3, rtz +; CHECK-NEXT: blt a2, a7, .LBB42_4 +; CHECK-NEXT: .LBB42_21: # %entry +; CHECK-NEXT: mv a2, a7 +; CHECK-NEXT: fcvt.l.s a4, fs2, rtz +; CHECK-NEXT: blt a3, a7, .LBB42_5 +; CHECK-NEXT: .LBB42_22: # %entry +; CHECK-NEXT: mv a3, a7 +; CHECK-NEXT: fcvt.l.s a5, fs1, rtz +; CHECK-NEXT: blt a4, a7, .LBB42_6 ; CHECK-NEXT: .LBB42_23: # %entry ; CHECK-NEXT: mv a4, a7 -; CHECK-NEXT: fcvt.l.s a6, ft1, rtz -; CHECK-NEXT: blt a5, a7, .LBB42_12 +; CHECK-NEXT: fcvt.l.s a6, fs0, rtz +; CHECK-NEXT: blt a5, a7, .LBB42_7 ; CHECK-NEXT: .LBB42_24: # %entry ; CHECK-NEXT: mv a5, a7 -; CHECK-NEXT: blt a6, a7, .LBB42_13 +; CHECK-NEXT: blt a6, a7, .LBB42_8 ; CHECK-NEXT: .LBB42_25: # %entry ; CHECK-NEXT: mv a6, a7 ; CHECK-NEXT: lui a7, 1048568 -; CHECK-NEXT: blt a7, a6, .LBB42_14 +; CHECK-NEXT: blt a7, a6, .LBB42_9 ; CHECK-NEXT: .LBB42_26: # %entry ; CHECK-NEXT: lui a6, 1048568 -; CHECK-NEXT: blt a7, a5, .LBB42_15 +; CHECK-NEXT: blt a7, a5, .LBB42_10 ; CHECK-NEXT: .LBB42_27: # %entry ; CHECK-NEXT: lui a5, 1048568 -; CHECK-NEXT: blt a7, a4, .LBB42_16 +; CHECK-NEXT: blt a7, a4, .LBB42_11 ; CHECK-NEXT: .LBB42_28: # %entry ; CHECK-NEXT: lui a4, 1048568 -; CHECK-NEXT: blt a7, a3, .LBB42_17 +; CHECK-NEXT: blt a7, a3, .LBB42_12 ; CHECK-NEXT: .LBB42_29: # %entry ; CHECK-NEXT: lui a3, 1048568 -; CHECK-NEXT: blt a7, a2, .LBB42_18 +; CHECK-NEXT: blt a7, a2, .LBB42_13 ; CHECK-NEXT: .LBB42_30: # %entry ; CHECK-NEXT: lui a2, 1048568 -; CHECK-NEXT: blt a7, a1, .LBB42_19 +; CHECK-NEXT: blt a7, a1, .LBB42_14 ; CHECK-NEXT: .LBB42_31: # %entry ; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: blt a7, s7, .LBB42_20 +; CHECK-NEXT: blt a7, s2, .LBB42_15 ; CHECK-NEXT: .LBB42_32: # %entry -; CHECK-NEXT: lui s7, 1048568 -; CHECK-NEXT: bge a7, a0, .LBB42_21 -; CHECK-NEXT: j .LBB42_22 +; CHECK-NEXT: lui s2, 1048568 +; CHECK-NEXT: bge a7, a0, .LBB42_16 +; CHECK-NEXT: j .LBB42_17 entry: %conv = fptosi <8 x half> %x to <8 x i32> %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> ) @@ -3088,18 +3125,24 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: utesth_f16i16_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -96 -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -128 +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs3, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs5, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs6, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -3109,48 +3152,49 @@ ; CHECK-NEXT: .cfi_offset s5, -56 ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 -; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: lhu s5, 0(a1) -; CHECK-NEXT: lhu s1, 56(a1) -; CHECK-NEXT: lhu s2, 48(a1) -; CHECK-NEXT: lhu s3, 40(a1) -; CHECK-NEXT: lhu s4, 32(a1) +; CHECK-NEXT: .cfi_offset fs0, -80 +; CHECK-NEXT: .cfi_offset fs1, -88 +; CHECK-NEXT: .cfi_offset fs2, -96 +; CHECK-NEXT: .cfi_offset fs3, -104 +; CHECK-NEXT: .cfi_offset fs4, -112 +; CHECK-NEXT: .cfi_offset fs5, -120 +; CHECK-NEXT: .cfi_offset fs6, -128 +; CHECK-NEXT: lhu s1, 0(a1) +; CHECK-NEXT: lhu s2, 56(a1) +; CHECK-NEXT: lhu s3, 48(a1) +; CHECK-NEXT: lhu s4, 40(a1) +; CHECK-NEXT: lhu s5, 32(a1) ; CHECK-NEXT: lhu s6, 24(a1) ; CHECK-NEXT: lhu s7, 16(a1) ; CHECK-NEXT: lhu a1, 8(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: fmv.s fs5, fa0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s7, a0 +; CHECK-NEXT: fmv.s fs6, fa0 ; CHECK-NEXT: mv a0, s6 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s6, a0 +; CHECK-NEXT: fmv.s fs4, fa0 +; CHECK-NEXT: mv a0, s5 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fmv.s fs3, fa0 ; CHECK-NEXT: mv a0, s4 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.lu.s s3, fs6, rtz +; CHECK-NEXT: fcvt.lu.s a0, fs5, rtz +; CHECK-NEXT: sext.w s2, a0 ; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s6 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s7 -; CHECK-NEXT: fcvt.lu.s s7, ft0, rtz -; CHECK-NEXT: fmv.w.x ft0, s8 -; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz -; CHECK-NEXT: sext.w s6, a0 -; CHECK-NEXT: mv a0, s5 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.lu.s a0, ft0, rtz +; CHECK-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-NEXT: sext.w a0, a0 ; CHECK-NEXT: lui a1, 16 ; CHECK-NEXT: addiw a1, a1, -1 @@ -3158,36 +3202,31 @@ ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB43_2: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: flw ft1, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.lu.s a3, ft1, rtz -; CHECK-NEXT: sext.w a2, s7 -; CHECK-NEXT: bltu s6, a1, .LBB43_4 +; CHECK-NEXT: fcvt.lu.s a3, fs4, rtz +; CHECK-NEXT: sext.w a2, s3 +; CHECK-NEXT: bltu s2, a1, .LBB43_4 ; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s6, a1 +; CHECK-NEXT: mv s2, a1 ; CHECK-NEXT: .LBB43_4: # %entry -; CHECK-NEXT: fmv.w.x ft1, s3 -; CHECK-NEXT: fcvt.lu.s a4, ft0, rtz +; CHECK-NEXT: fcvt.lu.s a4, fs3, rtz ; CHECK-NEXT: sext.w a3, a3 ; CHECK-NEXT: bltu a2, a1, .LBB43_6 ; CHECK-NEXT: # %bb.5: # %entry ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB43_6: # %entry -; CHECK-NEXT: fmv.w.x ft0, s2 -; CHECK-NEXT: fcvt.lu.s a5, ft1, rtz +; CHECK-NEXT: fcvt.lu.s a5, fs2, rtz ; CHECK-NEXT: sext.w a4, a4 ; CHECK-NEXT: bltu a3, a1, .LBB43_8 ; CHECK-NEXT: # %bb.7: # %entry ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB43_8: # %entry -; CHECK-NEXT: fmv.w.x ft1, s1 -; CHECK-NEXT: fcvt.lu.s a6, ft0, rtz +; CHECK-NEXT: fcvt.lu.s a6, fs1, rtz ; CHECK-NEXT: sext.w a5, a5 ; CHECK-NEXT: bltu a4, a1, .LBB43_10 ; CHECK-NEXT: # %bb.9: # %entry ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB43_10: # %entry -; CHECK-NEXT: fcvt.lu.s a7, ft1, rtz +; CHECK-NEXT: fcvt.lu.s a7, fs0, rtz ; CHECK-NEXT: sext.w a6, a6 ; CHECK-NEXT: bgeu a5, a1, .LBB43_15 ; CHECK-NEXT: # %bb.11: # %entry @@ -3204,19 +3243,25 @@ ; CHECK-NEXT: sh a4, 8(s0) ; CHECK-NEXT: sh a3, 6(s0) ; CHECK-NEXT: sh a2, 4(s0) -; CHECK-NEXT: sh s6, 2(s0) +; CHECK-NEXT: sh s2, 2(s0) ; CHECK-NEXT: sh a0, 0(s0) -; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 96 +; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs3, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs5, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs6, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 128 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB43_15: # %entry ; CHECK-NEXT: mv a5, a1 @@ -3236,18 +3281,24 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: ustest_f16i16_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -96 -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 80(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 72(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 64(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s3, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s5, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s6, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s7, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s8, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -128 +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs1, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs2, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs3, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs4, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs5, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs6, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 @@ -3257,155 +3308,167 @@ ; CHECK-NEXT: .cfi_offset s5, -56 ; CHECK-NEXT: .cfi_offset s6, -64 ; CHECK-NEXT: .cfi_offset s7, -72 -; CHECK-NEXT: .cfi_offset s8, -80 -; CHECK-NEXT: lhu s6, 56(a1) -; CHECK-NEXT: lhu s1, 0(a1) -; CHECK-NEXT: lhu s2, 8(a1) -; CHECK-NEXT: lhu s3, 16(a1) -; CHECK-NEXT: lhu s4, 24(a1) -; CHECK-NEXT: lhu s5, 32(a1) +; CHECK-NEXT: .cfi_offset fs0, -80 +; CHECK-NEXT: .cfi_offset fs1, -88 +; CHECK-NEXT: .cfi_offset fs2, -96 +; CHECK-NEXT: .cfi_offset fs3, -104 +; CHECK-NEXT: .cfi_offset fs4, -112 +; CHECK-NEXT: .cfi_offset fs5, -120 +; CHECK-NEXT: .cfi_offset fs6, -128 +; CHECK-NEXT: lhu s1, 56(a1) +; CHECK-NEXT: lhu s2, 0(a1) +; CHECK-NEXT: lhu s3, 8(a1) +; CHECK-NEXT: lhu s4, 16(a1) +; CHECK-NEXT: lhu s5, 24(a1) +; CHECK-NEXT: lhu s6, 32(a1) ; CHECK-NEXT: lhu s7, 40(a1) ; CHECK-NEXT: lhu a1, 48(a1) ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s8, a0 +; CHECK-NEXT: fmv.s fs6, fa0 ; CHECK-NEXT: mv a0, s7 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s7, a0 +; CHECK-NEXT: fmv.s fs5, fa0 +; CHECK-NEXT: mv a0, s6 +; CHECK-NEXT: call __extendhfsf2@plt +; CHECK-NEXT: fmv.s fs4, fa0 ; CHECK-NEXT: mv a0, s5 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s5, a0 +; CHECK-NEXT: fmv.s fs3, fa0 ; CHECK-NEXT: mv a0, s4 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s4, a0 +; CHECK-NEXT: fmv.s fs2, fa0 ; CHECK-NEXT: mv a0, s3 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s3, a0 +; CHECK-NEXT: fmv.s fs1, fa0 ; CHECK-NEXT: mv a0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fcvt.l.s s2, fs6, rtz ; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: fmv.w.x ft0, s7 -; CHECK-NEXT: fsw ft0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: fmv.w.x ft0, s8 -; CHECK-NEXT: fcvt.l.s s7, ft0, rtz -; CHECK-NEXT: mv a0, s6 -; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: fcvt.l.s a0, ft0, rtz +; CHECK-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NEXT: lui a1, 16 ; CHECK-NEXT: addiw a7, a1, -1 -; CHECK-NEXT: blt a0, a7, .LBB44_2 +; CHECK-NEXT: bge a0, a7, .LBB44_18 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: mv a0, a7 +; CHECK-NEXT: fcvt.l.s a1, fs5, rtz +; CHECK-NEXT: bge s2, a7, .LBB44_19 ; CHECK-NEXT: .LBB44_2: # %entry -; CHECK-NEXT: fmv.w.x ft1, s5 -; CHECK-NEXT: flw ft0, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: fcvt.l.s a1, ft0, rtz -; CHECK-NEXT: blt s7, a7, .LBB44_4 -; CHECK-NEXT: # %bb.3: # %entry -; CHECK-NEXT: mv s7, a7 +; CHECK-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NEXT: bge a1, a7, .LBB44_20 +; CHECK-NEXT: .LBB44_3: # %entry +; CHECK-NEXT: fcvt.l.s a3, fs3, rtz +; CHECK-NEXT: bge a2, a7, .LBB44_21 ; CHECK-NEXT: .LBB44_4: # %entry -; CHECK-NEXT: fmv.w.x ft0, s4 -; CHECK-NEXT: fcvt.l.s a2, ft1, rtz -; CHECK-NEXT: blt a1, a7, .LBB44_6 -; CHECK-NEXT: # %bb.5: # %entry -; CHECK-NEXT: mv a1, a7 -; CHECK-NEXT: .LBB44_6: # %entry -; CHECK-NEXT: fmv.w.x ft1, s3 -; CHECK-NEXT: fcvt.l.s a3, ft0, rtz -; CHECK-NEXT: blt a2, a7, .LBB44_8 -; CHECK-NEXT: # %bb.7: # %entry -; CHECK-NEXT: mv a2, a7 -; CHECK-NEXT: .LBB44_8: # %entry -; CHECK-NEXT: fmv.w.x ft0, s2 -; CHECK-NEXT: fcvt.l.s a4, ft1, rtz -; CHECK-NEXT: blt a3, a7, .LBB44_10 -; CHECK-NEXT: # %bb.9: # %entry -; CHECK-NEXT: mv a3, a7 -; CHECK-NEXT: .LBB44_10: # %entry -; CHECK-NEXT: fmv.w.x ft1, s1 -; CHECK-NEXT: fcvt.l.s a5, ft0, rtz +; CHECK-NEXT: fcvt.l.s a4, fs2, rtz +; CHECK-NEXT: bge a3, a7, .LBB44_22 +; CHECK-NEXT: .LBB44_5: # %entry +; CHECK-NEXT: fcvt.l.s a5, fs1, rtz ; CHECK-NEXT: bge a4, a7, .LBB44_23 -; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: fcvt.l.s a6, ft1, rtz +; CHECK-NEXT: .LBB44_6: # %entry +; CHECK-NEXT: fcvt.l.s a6, fs0, rtz ; CHECK-NEXT: bge a5, a7, .LBB44_24 -; CHECK-NEXT: .LBB44_12: # %entry +; CHECK-NEXT: .LBB44_7: # %entry ; CHECK-NEXT: bge a6, a7, .LBB44_25 -; CHECK-NEXT: .LBB44_13: # %entry +; CHECK-NEXT: .LBB44_8: # %entry ; CHECK-NEXT: blez a6, .LBB44_26 -; CHECK-NEXT: .LBB44_14: # %entry +; CHECK-NEXT: .LBB44_9: # %entry ; CHECK-NEXT: blez a5, .LBB44_27 -; CHECK-NEXT: .LBB44_15: # %entry +; CHECK-NEXT: .LBB44_10: # %entry ; CHECK-NEXT: blez a4, .LBB44_28 -; CHECK-NEXT: .LBB44_16: # %entry +; CHECK-NEXT: .LBB44_11: # %entry ; CHECK-NEXT: blez a3, .LBB44_29 -; CHECK-NEXT: .LBB44_17: # %entry +; CHECK-NEXT: .LBB44_12: # %entry ; CHECK-NEXT: blez a2, .LBB44_30 -; CHECK-NEXT: .LBB44_18: # %entry +; CHECK-NEXT: .LBB44_13: # %entry ; CHECK-NEXT: blez a1, .LBB44_31 -; CHECK-NEXT: .LBB44_19: # %entry -; CHECK-NEXT: blez s7, .LBB44_32 -; CHECK-NEXT: .LBB44_20: # %entry -; CHECK-NEXT: bgtz a0, .LBB44_22 -; CHECK-NEXT: .LBB44_21: # %entry +; CHECK-NEXT: .LBB44_14: # %entry +; CHECK-NEXT: blez s2, .LBB44_32 +; CHECK-NEXT: .LBB44_15: # %entry +; CHECK-NEXT: bgtz a0, .LBB44_17 +; CHECK-NEXT: .LBB44_16: # %entry ; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: .LBB44_22: # %entry +; CHECK-NEXT: .LBB44_17: # %entry ; CHECK-NEXT: sh a0, 14(s0) -; CHECK-NEXT: sh s7, 12(s0) +; CHECK-NEXT: sh s2, 12(s0) ; CHECK-NEXT: sh a1, 10(s0) ; CHECK-NEXT: sh a2, 8(s0) ; CHECK-NEXT: sh a3, 6(s0) ; CHECK-NEXT: sh a4, 4(s0) ; CHECK-NEXT: sh a5, 2(s0) ; CHECK-NEXT: sh a6, 0(s0) -; CHECK-NEXT: ld ra, 88(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 80(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 72(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 64(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s3, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s5, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s6, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s7, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s8, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 96 +; CHECK-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs1, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs2, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs3, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs4, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs5, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs6, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 128 ; CHECK-NEXT: ret +; CHECK-NEXT: .LBB44_18: # %entry +; CHECK-NEXT: mv a0, a7 +; CHECK-NEXT: fcvt.l.s a1, fs5, rtz +; CHECK-NEXT: blt s2, a7, .LBB44_2 +; CHECK-NEXT: .LBB44_19: # %entry +; CHECK-NEXT: mv s2, a7 +; CHECK-NEXT: fcvt.l.s a2, fs4, rtz +; CHECK-NEXT: blt a1, a7, .LBB44_3 +; CHECK-NEXT: .LBB44_20: # %entry +; CHECK-NEXT: mv a1, a7 +; CHECK-NEXT: fcvt.l.s a3, fs3, rtz +; CHECK-NEXT: blt a2, a7, .LBB44_4 +; CHECK-NEXT: .LBB44_21: # %entry +; CHECK-NEXT: mv a2, a7 +; CHECK-NEXT: fcvt.l.s a4, fs2, rtz +; CHECK-NEXT: blt a3, a7, .LBB44_5 +; CHECK-NEXT: .LBB44_22: # %entry +; CHECK-NEXT: mv a3, a7 +; CHECK-NEXT: fcvt.l.s a5, fs1, rtz +; CHECK-NEXT: blt a4, a7, .LBB44_6 ; CHECK-NEXT: .LBB44_23: # %entry ; CHECK-NEXT: mv a4, a7 -; CHECK-NEXT: fcvt.l.s a6, ft1, rtz -; CHECK-NEXT: blt a5, a7, .LBB44_12 +; CHECK-NEXT: fcvt.l.s a6, fs0, rtz +; CHECK-NEXT: blt a5, a7, .LBB44_7 ; CHECK-NEXT: .LBB44_24: # %entry ; CHECK-NEXT: mv a5, a7 -; CHECK-NEXT: blt a6, a7, .LBB44_13 +; CHECK-NEXT: blt a6, a7, .LBB44_8 ; CHECK-NEXT: .LBB44_25: # %entry ; CHECK-NEXT: mv a6, a7 -; CHECK-NEXT: bgtz a6, .LBB44_14 +; CHECK-NEXT: bgtz a6, .LBB44_9 ; CHECK-NEXT: .LBB44_26: # %entry ; CHECK-NEXT: li a6, 0 -; CHECK-NEXT: bgtz a5, .LBB44_15 +; CHECK-NEXT: bgtz a5, .LBB44_10 ; CHECK-NEXT: .LBB44_27: # %entry ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: bgtz a4, .LBB44_16 +; CHECK-NEXT: bgtz a4, .LBB44_11 ; CHECK-NEXT: .LBB44_28: # %entry ; CHECK-NEXT: li a4, 0 -; CHECK-NEXT: bgtz a3, .LBB44_17 +; CHECK-NEXT: bgtz a3, .LBB44_12 ; CHECK-NEXT: .LBB44_29: # %entry ; CHECK-NEXT: li a3, 0 -; CHECK-NEXT: bgtz a2, .LBB44_18 +; CHECK-NEXT: bgtz a2, .LBB44_13 ; CHECK-NEXT: .LBB44_30: # %entry ; CHECK-NEXT: li a2, 0 -; CHECK-NEXT: bgtz a1, .LBB44_19 +; CHECK-NEXT: bgtz a1, .LBB44_14 ; CHECK-NEXT: .LBB44_31: # %entry ; CHECK-NEXT: li a1, 0 -; CHECK-NEXT: bgtz s7, .LBB44_20 +; CHECK-NEXT: bgtz s2, .LBB44_15 ; CHECK-NEXT: .LBB44_32: # %entry -; CHECK-NEXT: li s7, 0 -; CHECK-NEXT: blez a0, .LBB44_21 -; CHECK-NEXT: j .LBB44_22 +; CHECK-NEXT: li s2, 0 +; CHECK-NEXT: blez a0, .LBB44_16 +; CHECK-NEXT: j .LBB44_17 entry: %conv = fptosi <8 x half> %x to <8 x i32> %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> ) @@ -3424,16 +3487,16 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.d fs0, fa1 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.d fa0, fs0 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: li a0, -1 @@ -3480,7 +3543,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB45_17: # %entry @@ -3539,17 +3602,17 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.d fs0, fa0 +; CHECK-NEXT: fmv.d fa0, fa1 ; CHECK-NEXT: call __fixunsdfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.d fa0, fs0 ; CHECK-NEXT: call __fixunsdfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: mv a3, a1 @@ -3571,7 +3634,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB46_7: # %entry @@ -3596,16 +3659,16 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.d fs0, fa1 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.d fa0, fs0 ; CHECK-NEXT: call __fixdfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: li a5, 1 @@ -3640,7 +3703,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB47_12: # %entry @@ -3692,16 +3755,16 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.s fs0, fa1 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.s fa0, fs0 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: li a0, -1 @@ -3748,7 +3811,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB48_17: # %entry @@ -3807,17 +3870,17 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a0 -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.s fs0, fa0 +; CHECK-NEXT: fmv.s fa0, fa1 ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.s fa0, fs0 ; CHECK-NEXT: call __fixunssfti@plt ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: mv a3, a1 @@ -3839,7 +3902,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB49_7: # %entry @@ -3864,16 +3927,16 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: .cfi_offset s1, -24 -; CHECK-NEXT: .cfi_offset s2, -32 -; CHECK-NEXT: mv s2, a1 +; CHECK-NEXT: .cfi_offset fs0, -32 +; CHECK-NEXT: fmv.s fs0, fa1 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv s0, a0 ; CHECK-NEXT: mv s1, a1 -; CHECK-NEXT: mv a0, s2 +; CHECK-NEXT: fmv.s fa0, fs0 ; CHECK-NEXT: call __fixsfti@plt ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: li a5, 1 @@ -3908,7 +3971,7 @@ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB50_12: # %entry diff --git a/llvm/test/CodeGen/RISCV/frm-dependency.ll b/llvm/test/CodeGen/RISCV/frm-dependency.ll --- a/llvm/test/CodeGen/RISCV/frm-dependency.ll +++ b/llvm/test/CodeGen/RISCV/frm-dependency.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+f,+d -stop-after=finalize-isel < %s \ -; RUN: | FileCheck -check-prefixes=RV32IF %s +; RUN: -target-abi=ilp32d | FileCheck -check-prefixes=RV32IF %s ; RUN: llc -mtriple=riscv64 -mattr=+f,+d -stop-after=finalize-isel < %s \ -; RUN: | FileCheck -check-prefixes=RV64IF %s +; RUN: -target-abi=lp64d | FileCheck -check-prefixes=RV64IF %s ; Make sure an implicit FRM dependency is added to instructions with dynamic ; rounding. @@ -10,28 +10,22 @@ define float @fadd_s(float %a, float %b) nounwind { ; RV32IF-LABEL: name: fadd_s ; RV32IF: bb.0 (%ir-block.0): - ; RV32IF-NEXT: liveins: $x10, $x11 + ; RV32IF-NEXT: liveins: $f10_f, $f11_f ; RV32IF-NEXT: {{ $}} - ; RV32IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 - ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 - ; RV32IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] - ; RV32IF-NEXT: [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]] - ; RV32IF-NEXT: %4:fpr32 = nofpexcept FADD_S killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm - ; RV32IF-NEXT: [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %4 - ; RV32IF-NEXT: $x10 = COPY [[FMV_X_W]] - ; RV32IF-NEXT: PseudoRET implicit $x10 + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f11_f + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f10_f + ; RV32IF-NEXT: %2:fpr32 = nofpexcept FADD_S [[COPY1]], [[COPY]], 7, implicit $frm + ; RV32IF-NEXT: $f10_f = COPY %2 + ; RV32IF-NEXT: PseudoRET implicit $f10_f ; RV64IF-LABEL: name: fadd_s ; RV64IF: bb.0 (%ir-block.0): - ; RV64IF-NEXT: liveins: $x10, $x11 + ; RV64IF-NEXT: liveins: $f10_f, $f11_f ; RV64IF-NEXT: {{ $}} - ; RV64IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 - ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 - ; RV64IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] - ; RV64IF-NEXT: [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]] - ; RV64IF-NEXT: %4:fpr32 = nofpexcept FADD_S killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm - ; RV64IF-NEXT: [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %4 - ; RV64IF-NEXT: $x10 = COPY [[FMV_X_W]] - ; RV64IF-NEXT: PseudoRET implicit $x10 + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f11_f + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f10_f + ; RV64IF-NEXT: %2:fpr32 = nofpexcept FADD_S [[COPY1]], [[COPY]], 7, implicit $frm + ; RV64IF-NEXT: $f10_f = COPY %2 + ; RV64IF-NEXT: PseudoRET implicit $f10_f %1 = fadd float %a, %b ret float %1 } @@ -41,32 +35,24 @@ define float @fmadd_s(float %a, float %b, float %c) nounwind { ; RV32IF-LABEL: name: fmadd_s ; RV32IF: bb.0 (%ir-block.0): - ; RV32IF-NEXT: liveins: $x10, $x11, $x12 + ; RV32IF-NEXT: liveins: $f10_f, $f11_f, $f12_f ; RV32IF-NEXT: {{ $}} - ; RV32IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x12 - ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; RV32IF-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 - ; RV32IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] - ; RV32IF-NEXT: [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]] - ; RV32IF-NEXT: [[FMV_W_X2:%[0-9]+]]:fpr32 = FMV_W_X [[COPY2]] - ; RV32IF-NEXT: %6:fpr32 = nofpexcept FMADD_S killed [[FMV_W_X2]], killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm - ; RV32IF-NEXT: [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %6 - ; RV32IF-NEXT: $x10 = COPY [[FMV_X_W]] - ; RV32IF-NEXT: PseudoRET implicit $x10 + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f12_f + ; RV32IF-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f + ; RV32IF-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $f10_f + ; RV32IF-NEXT: %3:fpr32 = nofpexcept FMADD_S [[COPY2]], [[COPY1]], [[COPY]], 7, implicit $frm + ; RV32IF-NEXT: $f10_f = COPY %3 + ; RV32IF-NEXT: PseudoRET implicit $f10_f ; RV64IF-LABEL: name: fmadd_s ; RV64IF: bb.0 (%ir-block.0): - ; RV64IF-NEXT: liveins: $x10, $x11, $x12 + ; RV64IF-NEXT: liveins: $f10_f, $f11_f, $f12_f ; RV64IF-NEXT: {{ $}} - ; RV64IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x12 - ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 - ; RV64IF-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 - ; RV64IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] - ; RV64IF-NEXT: [[FMV_W_X1:%[0-9]+]]:fpr32 = FMV_W_X [[COPY1]] - ; RV64IF-NEXT: [[FMV_W_X2:%[0-9]+]]:fpr32 = FMV_W_X [[COPY2]] - ; RV64IF-NEXT: %6:fpr32 = nofpexcept FMADD_S killed [[FMV_W_X2]], killed [[FMV_W_X1]], killed [[FMV_W_X]], 7, implicit $frm - ; RV64IF-NEXT: [[FMV_X_W:%[0-9]+]]:gpr = FMV_X_W killed %6 - ; RV64IF-NEXT: $x10 = COPY [[FMV_X_W]] - ; RV64IF-NEXT: PseudoRET implicit $x10 + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f12_f + ; RV64IF-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY $f11_f + ; RV64IF-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $f10_f + ; RV64IF-NEXT: %3:fpr32 = nofpexcept FMADD_S [[COPY2]], [[COPY1]], [[COPY]], 7, implicit $frm + ; RV64IF-NEXT: $f10_f = COPY %3 + ; RV64IF-NEXT: PseudoRET implicit $f10_f %1 = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %1 } @@ -75,21 +61,19 @@ define i32 @fcvt_w_s(float %a) nounwind { ; RV32IF-LABEL: name: fcvt_w_s ; RV32IF: bb.0 (%ir-block.0): - ; RV32IF-NEXT: liveins: $x10 + ; RV32IF-NEXT: liveins: $f10_f ; RV32IF-NEXT: {{ $}} - ; RV32IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; RV32IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] - ; RV32IF-NEXT: %2:gpr = nofpexcept FCVT_W_S killed [[FMV_W_X]], 1 - ; RV32IF-NEXT: $x10 = COPY %2 + ; RV32IF-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f + ; RV32IF-NEXT: %1:gpr = nofpexcept FCVT_W_S [[COPY]], 1 + ; RV32IF-NEXT: $x10 = COPY %1 ; RV32IF-NEXT: PseudoRET implicit $x10 ; RV64IF-LABEL: name: fcvt_w_s ; RV64IF: bb.0 (%ir-block.0): - ; RV64IF-NEXT: liveins: $x10 + ; RV64IF-NEXT: liveins: $f10_f ; RV64IF-NEXT: {{ $}} - ; RV64IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; RV64IF-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]] - ; RV64IF-NEXT: %2:gpr = nofpexcept FCVT_W_S killed [[FMV_W_X]], 1 - ; RV64IF-NEXT: $x10 = COPY %2 + ; RV64IF-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $f10_f + ; RV64IF-NEXT: %1:gpr = nofpexcept FCVT_W_S [[COPY]], 1 + ; RV64IF-NEXT: $x10 = COPY %1 ; RV64IF-NEXT: PseudoRET implicit $x10 %1 = fptosi float %a to i32 ret i32 %1 @@ -104,21 +88,16 @@ ; RV32IF-NEXT: {{ $}} ; RV32IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV32IF-NEXT: %1:fpr64 = nofpexcept FCVT_D_W [[COPY]] - ; RV32IF-NEXT: FSD killed %1, %stack.0, 0 :: (store (s64) into %stack.0) - ; RV32IF-NEXT: [[LW:%[0-9]+]]:gpr = LW %stack.0, 0 :: (load (s32) from %stack.0, align 8) - ; RV32IF-NEXT: [[LW1:%[0-9]+]]:gpr = LW %stack.0, 4 :: (load (s32) from %stack.0 + 4, basealign 8) - ; RV32IF-NEXT: $x10 = COPY [[LW]] - ; RV32IF-NEXT: $x11 = COPY [[LW1]] - ; RV32IF-NEXT: PseudoRET implicit $x10, implicit $x11 + ; RV32IF-NEXT: $f10_d = COPY %1 + ; RV32IF-NEXT: PseudoRET implicit $f10_d ; RV64IF-LABEL: name: fcvt_d_w ; RV64IF: bb.0 (%ir-block.0): ; RV64IF-NEXT: liveins: $x10 ; RV64IF-NEXT: {{ $}} ; RV64IF-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 ; RV64IF-NEXT: %1:fpr64 = nofpexcept FCVT_D_W [[COPY]] - ; RV64IF-NEXT: [[FMV_X_D:%[0-9]+]]:gpr = FMV_X_D killed %1 - ; RV64IF-NEXT: $x10 = COPY [[FMV_X_D]] - ; RV64IF-NEXT: PseudoRET implicit $x10 + ; RV64IF-NEXT: $f10_d = COPY %1 + ; RV64IF-NEXT: PseudoRET implicit $f10_d %1 = sitofp i32 %a to double ret double %1 } diff --git a/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll --- a/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll +++ b/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ -; RUN: -disable-strictnode-mutation | FileCheck %s -check-prefix=RV64ID +; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs -target-abi=lp64d \ +; RUN: -disable-strictnode-mutation < %s | FileCheck %s -check-prefix=RV64ID ; This file exhaustively checks double<->i32 conversions. In general, ; fcvt.l[u].d can be selected instead of fcvt.w[u].d because poison is @@ -10,8 +10,7 @@ define i32 @aext_fptosi(double %a) nounwind strictfp { ; RV64ID-LABEL: aext_fptosi: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.w.d a0, fa0, rtz ; RV64ID-NEXT: ret %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp ret i32 %1 @@ -21,8 +20,7 @@ define signext i32 @sext_fptosi(double %a) nounwind strictfp { ; RV64ID-LABEL: sext_fptosi: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.w.d a0, fa0, rtz ; RV64ID-NEXT: ret %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp ret i32 %1 @@ -31,8 +29,7 @@ define zeroext i32 @zext_fptosi(double %a) nounwind strictfp { ; RV64ID-LABEL: zext_fptosi: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.w.d a0, fa0, rtz ; RV64ID-NEXT: slli a0, a0, 32 ; RV64ID-NEXT: srli a0, a0, 32 ; RV64ID-NEXT: ret @@ -43,8 +40,7 @@ define i32 @aext_fptoui(double %a) nounwind strictfp { ; RV64ID-LABEL: aext_fptoui: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.wu.d a0, fa0, rtz ; RV64ID-NEXT: ret %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp ret i32 %1 @@ -54,8 +50,7 @@ define signext i32 @sext_fptoui(double %a) nounwind strictfp { ; RV64ID-LABEL: sext_fptoui: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.wu.d a0, fa0, rtz ; RV64ID-NEXT: ret %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp ret i32 %1 @@ -64,8 +59,7 @@ define zeroext i32 @zext_fptoui(double %a) nounwind strictfp { ; RV64ID-LABEL: zext_fptoui: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.lu.d a0, fa0, rtz ; RV64ID-NEXT: ret %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp ret i32 %1 @@ -74,8 +68,7 @@ define double @uitofp_aext_i32_to_f64(i32 %a) nounwind strictfp { ; RV64ID-LABEL: uitofp_aext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.wu ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.wu fa0, a0 ; RV64ID-NEXT: ret %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret double %1 @@ -85,8 +78,7 @@ define double @uitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp { ; RV64ID-LABEL: uitofp_sext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.wu ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.wu fa0, a0 ; RV64ID-NEXT: ret %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret double %1 @@ -95,8 +87,7 @@ define double @uitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp { ; RV64ID-LABEL: uitofp_zext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.wu ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.wu fa0, a0 ; RV64ID-NEXT: ret %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret double %1 @@ -105,8 +96,7 @@ define double @sitofp_aext_i32_to_f64(i32 %a) nounwind strictfp { ; RV64ID-LABEL: sitofp_aext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.w ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.w fa0, a0 ; RV64ID-NEXT: ret %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret double %1 @@ -116,8 +106,7 @@ define double @sitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp { ; RV64ID-LABEL: sitofp_sext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.w ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.w fa0, a0 ; RV64ID-NEXT: ret %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret double %1 @@ -126,8 +115,7 @@ define double @sitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp { ; RV64ID-LABEL: sitofp_zext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.w ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.w fa0, a0 ; RV64ID-NEXT: ret %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret double %1 diff --git a/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll b/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll --- a/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64d-double-convert.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ -; RUN: | FileCheck %s -check-prefix=RV64ID +; RUN: -target-abi=lp64d | FileCheck %s -check-prefix=RV64ID ; This file exhaustively checks double<->i32 conversions. In general, ; fcvt.l[u].d can be selected instead of fcvt.w[u].d because poison is @@ -10,8 +10,7 @@ define i32 @aext_fptosi(double %a) nounwind { ; RV64ID-LABEL: aext_fptosi: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.w.d a0, fa0, rtz ; RV64ID-NEXT: ret %1 = fptosi double %a to i32 ret i32 %1 @@ -20,8 +19,7 @@ define signext i32 @sext_fptosi(double %a) nounwind { ; RV64ID-LABEL: sext_fptosi: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.w.d a0, fa0, rtz ; RV64ID-NEXT: ret %1 = fptosi double %a to i32 ret i32 %1 @@ -30,8 +28,7 @@ define zeroext i32 @zext_fptosi(double %a) nounwind { ; RV64ID-LABEL: zext_fptosi: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.w.d a0, fa0, rtz ; RV64ID-NEXT: slli a0, a0, 32 ; RV64ID-NEXT: srli a0, a0, 32 ; RV64ID-NEXT: ret @@ -42,8 +39,7 @@ define i32 @aext_fptoui(double %a) nounwind { ; RV64ID-LABEL: aext_fptoui: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.wu.d a0, fa0, rtz ; RV64ID-NEXT: ret %1 = fptoui double %a to i32 ret i32 %1 @@ -52,8 +48,7 @@ define signext i32 @sext_fptoui(double %a) nounwind { ; RV64ID-LABEL: sext_fptoui: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.wu.d a0, fa0, rtz ; RV64ID-NEXT: ret %1 = fptoui double %a to i32 ret i32 %1 @@ -62,8 +57,7 @@ define zeroext i32 @zext_fptoui(double %a) nounwind { ; RV64ID-LABEL: zext_fptoui: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fmv.d.x ft0, a0 -; RV64ID-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64ID-NEXT: fcvt.lu.d a0, fa0, rtz ; RV64ID-NEXT: ret %1 = fptoui double %a to i32 ret i32 %1 @@ -72,8 +66,7 @@ define double @uitofp_aext_i32_to_f64(i32 %a) nounwind { ; RV64ID-LABEL: uitofp_aext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.wu ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.wu fa0, a0 ; RV64ID-NEXT: ret %1 = uitofp i32 %a to double ret double %1 @@ -82,8 +75,7 @@ define double @uitofp_sext_i32_to_f64(i32 signext %a) nounwind { ; RV64ID-LABEL: uitofp_sext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.wu ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.wu fa0, a0 ; RV64ID-NEXT: ret %1 = uitofp i32 %a to double ret double %1 @@ -92,8 +84,7 @@ define double @uitofp_zext_i32_to_f64(i32 zeroext %a) nounwind { ; RV64ID-LABEL: uitofp_zext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.wu ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.wu fa0, a0 ; RV64ID-NEXT: ret %1 = uitofp i32 %a to double ret double %1 @@ -102,8 +93,7 @@ define double @sitofp_aext_i32_to_f64(i32 %a) nounwind { ; RV64ID-LABEL: sitofp_aext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.w ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.w fa0, a0 ; RV64ID-NEXT: ret %1 = sitofp i32 %a to double ret double %1 @@ -112,8 +102,7 @@ define double @sitofp_sext_i32_to_f64(i32 signext %a) nounwind { ; RV64ID-LABEL: sitofp_sext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.w ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.w fa0, a0 ; RV64ID-NEXT: ret %1 = sitofp i32 %a to double ret double %1 @@ -122,8 +111,7 @@ define double @sitofp_zext_i32_to_f64(i32 zeroext %a) nounwind { ; RV64ID-LABEL: sitofp_zext_i32_to_f64: ; RV64ID: # %bb.0: -; RV64ID-NEXT: fcvt.d.w ft0, a0 -; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: fcvt.d.w fa0, a0 ; RV64ID-NEXT: ret %1 = sitofp i32 %a to double ret double %1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV32-FP -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV64-FP +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32-FP +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64-FP define i16 @bitcast_v1f16_i16(<1 x half> %a) { ; CHECK-LABEL: bitcast_v1f16_i16: @@ -16,8 +20,7 @@ ; CHECK-LABEL: bitcast_v1f16_f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 0, e16, mf4, ta, mu -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: fmv.x.h a0, ft0 +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %b = bitcast <1 x half> %a to half ret half %b @@ -44,35 +47,21 @@ } define float @bitcast_v2f16_f32(<2 x half> %a) { -; RV32-FP-LABEL: bitcast_v2f16_f32: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: vsetivli zero, 0, e32, mf2, ta, mu -; RV32-FP-NEXT: vmv.x.s a0, v8 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_v2f16_f32: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 0, e32, mf2, ta, mu -; RV64-FP-NEXT: vfmv.f.s ft0, v8 -; RV64-FP-NEXT: fmv.x.w a0, ft0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_v2f16_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %b = bitcast <2 x half> %a to float ret float %b } define float @bitcast_v1f32_f32(<1 x float> %a) { -; RV32-FP-LABEL: bitcast_v1f32_f32: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: vsetivli zero, 0, e32, mf2, ta, mu -; RV32-FP-NEXT: vmv.x.s a0, v8 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_v1f32_f32: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 0, e32, mf2, ta, mu -; RV64-FP-NEXT: vfmv.f.s ft0, v8 -; RV64-FP-NEXT: fmv.x.w a0, ft0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_v1f32_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, mu +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %b = bitcast <1 x float> %a to float ret float %b } @@ -135,67 +124,31 @@ } define double @bitcast_v4f16_f64(<4 x half> %a) { -; RV32-FP-LABEL: bitcast_v4f16_f64: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: addi sp, sp, -16 -; RV32-FP-NEXT: .cfi_def_cfa_offset 16 -; RV32-FP-NEXT: vsetivli zero, 0, e64, m1, ta, mu -; RV32-FP-NEXT: vfmv.f.s ft0, v8 -; RV32-FP-NEXT: fsd ft0, 8(sp) -; RV32-FP-NEXT: lw a0, 8(sp) -; RV32-FP-NEXT: lw a1, 12(sp) -; RV32-FP-NEXT: addi sp, sp, 16 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_v4f16_f64: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 0, e64, m1, ta, mu -; RV64-FP-NEXT: vmv.x.s a0, v8 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_v4f16_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, mu +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %b = bitcast <4 x half> %a to double ret double %b } define double @bitcast_v2f32_f64(<2 x float> %a) { -; RV32-FP-LABEL: bitcast_v2f32_f64: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: addi sp, sp, -16 -; RV32-FP-NEXT: .cfi_def_cfa_offset 16 -; RV32-FP-NEXT: vsetivli zero, 0, e64, m1, ta, mu -; RV32-FP-NEXT: vfmv.f.s ft0, v8 -; RV32-FP-NEXT: fsd ft0, 8(sp) -; RV32-FP-NEXT: lw a0, 8(sp) -; RV32-FP-NEXT: lw a1, 12(sp) -; RV32-FP-NEXT: addi sp, sp, 16 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_v2f32_f64: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 0, e64, m1, ta, mu -; RV64-FP-NEXT: vmv.x.s a0, v8 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_v2f32_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, mu +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %b = bitcast <2 x float> %a to double ret double %b } define double @bitcast_v1f64_f64(<1 x double> %a) { -; RV32-FP-LABEL: bitcast_v1f64_f64: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: addi sp, sp, -16 -; RV32-FP-NEXT: .cfi_def_cfa_offset 16 -; RV32-FP-NEXT: vsetivli zero, 0, e64, m1, ta, mu -; RV32-FP-NEXT: vfmv.f.s ft0, v8 -; RV32-FP-NEXT: fsd ft0, 8(sp) -; RV32-FP-NEXT: lw a0, 8(sp) -; RV32-FP-NEXT: lw a1, 12(sp) -; RV32-FP-NEXT: addi sp, sp, 16 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_v1f64_f64: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 0, e64, m1, ta, mu -; RV64-FP-NEXT: vmv.x.s a0, v8 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_v1f64_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, mu +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %b = bitcast <1 x double> %a to double ret double %b } @@ -305,9 +258,8 @@ define <1 x i16> @bitcast_f16_v1i16(half %a) { ; CHECK-LABEL: bitcast_f16_v1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: fmv.h.x ft0, a0 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %b = bitcast half %a to <1 x i16> ret <1 x i16> %b @@ -316,210 +268,109 @@ define <1 x half> @bitcast_f16_v1f16(half %a) { ; CHECK-LABEL: bitcast_f16_v1f16: ; CHECK: # %bb.0: -; CHECK-NEXT: fmv.h.x ft0, a0 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret %b = bitcast half %a to <1 x half> ret <1 x half> %b } define <2 x i16> @bitcast_f32_v2i16(float %a) { -; RV32-FP-LABEL: bitcast_f32_v2i16: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV32-FP-NEXT: vmv.s.x v8, a0 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_f32_v2i16: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: fmv.w.x ft0, a0 -; RV64-FP-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV64-FP-NEXT: vfmv.s.f v8, ft0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_f32_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret %b = bitcast float %a to <2 x i16> ret <2 x i16> %b } define <2 x half> @bitcast_f32_v2f16(float %a) { -; RV32-FP-LABEL: bitcast_f32_v2f16: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV32-FP-NEXT: vmv.s.x v8, a0 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_f32_v2f16: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: fmv.w.x ft0, a0 -; RV64-FP-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV64-FP-NEXT: vfmv.s.f v8, ft0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_f32_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret %b = bitcast float %a to <2 x half> ret <2 x half> %b } define <1 x i32> @bitcast_f32_v1i32(float %a) { -; RV32-FP-LABEL: bitcast_f32_v1i32: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV32-FP-NEXT: vmv.s.x v8, a0 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_f32_v1i32: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: fmv.w.x ft0, a0 -; RV64-FP-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV64-FP-NEXT: vfmv.s.f v8, ft0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_f32_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret %b = bitcast float %a to <1 x i32> ret <1 x i32> %b } define <1 x float> @bitcast_f32_v1f32(float %a) { -; RV32-FP-LABEL: bitcast_f32_v1f32: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV32-FP-NEXT: vmv.s.x v8, a0 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_f32_v1f32: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: fmv.w.x ft0, a0 -; RV64-FP-NEXT: vsetivli zero, 1, e32, mf2, ta, mu -; RV64-FP-NEXT: vfmv.s.f v8, ft0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_f32_v1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret %b = bitcast float %a to <1 x float> ret <1 x float> %b } define <4 x i16> @bitcast_f64_v4i16(double %a) { -; RV32-FP-LABEL: bitcast_f64_v4i16: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: addi sp, sp, -16 -; RV32-FP-NEXT: .cfi_def_cfa_offset 16 -; RV32-FP-NEXT: sw a0, 8(sp) -; RV32-FP-NEXT: sw a1, 12(sp) -; RV32-FP-NEXT: fld ft0, 8(sp) -; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-FP-NEXT: vfmv.s.f v8, ft0 -; RV32-FP-NEXT: addi sp, sp, 16 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_f64_v4i16: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-FP-NEXT: vmv.s.x v8, a0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_f64_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret %b = bitcast double %a to <4 x i16> ret <4 x i16> %b } define <4 x half> @bitcast_f64_v4f16(double %a) { -; RV32-FP-LABEL: bitcast_f64_v4f16: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: addi sp, sp, -16 -; RV32-FP-NEXT: .cfi_def_cfa_offset 16 -; RV32-FP-NEXT: sw a0, 8(sp) -; RV32-FP-NEXT: sw a1, 12(sp) -; RV32-FP-NEXT: fld ft0, 8(sp) -; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-FP-NEXT: vfmv.s.f v8, ft0 -; RV32-FP-NEXT: addi sp, sp, 16 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_f64_v4f16: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-FP-NEXT: vmv.s.x v8, a0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_f64_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret %b = bitcast double %a to <4 x half> ret <4 x half> %b } define <2 x i32> @bitcast_f64_v2i32(double %a) { -; RV32-FP-LABEL: bitcast_f64_v2i32: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: addi sp, sp, -16 -; RV32-FP-NEXT: .cfi_def_cfa_offset 16 -; RV32-FP-NEXT: sw a0, 8(sp) -; RV32-FP-NEXT: sw a1, 12(sp) -; RV32-FP-NEXT: fld ft0, 8(sp) -; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-FP-NEXT: vfmv.s.f v8, ft0 -; RV32-FP-NEXT: addi sp, sp, 16 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_f64_v2i32: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-FP-NEXT: vmv.s.x v8, a0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_f64_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret %b = bitcast double %a to <2 x i32> ret <2 x i32> %b } define <2 x float> @bitcast_f64_v2f32(double %a) { -; RV32-FP-LABEL: bitcast_f64_v2f32: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: addi sp, sp, -16 -; RV32-FP-NEXT: .cfi_def_cfa_offset 16 -; RV32-FP-NEXT: sw a0, 8(sp) -; RV32-FP-NEXT: sw a1, 12(sp) -; RV32-FP-NEXT: fld ft0, 8(sp) -; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-FP-NEXT: vfmv.s.f v8, ft0 -; RV32-FP-NEXT: addi sp, sp, 16 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_f64_v2f32: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-FP-NEXT: vmv.s.x v8, a0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_f64_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret %b = bitcast double %a to <2 x float> ret <2 x float> %b } define <1 x i64> @bitcast_f64_v1i64(double %a) { -; RV32-FP-LABEL: bitcast_f64_v1i64: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: addi sp, sp, -16 -; RV32-FP-NEXT: .cfi_def_cfa_offset 16 -; RV32-FP-NEXT: sw a0, 8(sp) -; RV32-FP-NEXT: sw a1, 12(sp) -; RV32-FP-NEXT: fld ft0, 8(sp) -; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-FP-NEXT: vfmv.s.f v8, ft0 -; RV32-FP-NEXT: addi sp, sp, 16 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_f64_v1i64: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-FP-NEXT: vmv.s.x v8, a0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_f64_v1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret %b = bitcast double %a to <1 x i64> ret <1 x i64> %b } define <1 x double> @bitcast_f64_v1f64(double %a) { -; RV32-FP-LABEL: bitcast_f64_v1f64: -; RV32-FP: # %bb.0: -; RV32-FP-NEXT: addi sp, sp, -16 -; RV32-FP-NEXT: .cfi_def_cfa_offset 16 -; RV32-FP-NEXT: sw a0, 8(sp) -; RV32-FP-NEXT: sw a1, 12(sp) -; RV32-FP-NEXT: fld ft0, 8(sp) -; RV32-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-FP-NEXT: vfmv.s.f v8, ft0 -; RV32-FP-NEXT: addi sp, sp, 16 -; RV32-FP-NEXT: ret -; -; RV64-FP-LABEL: bitcast_f64_v1f64: -; RV64-FP: # %bb.0: -; RV64-FP-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV64-FP-NEXT: vmv.s.x v8, a0 -; RV64-FP-NEXT: ret +; CHECK-LABEL: bitcast_f64_v1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret %b = bitcast double %a to <1 x double> ret <1 x double> %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f -target-abi=lp64f \ ; RUN: -riscv-v-vector-bits-min=128 | FileCheck %s define void @sink_splat_mul(i32* nocapture %a, i32 signext %x) { @@ -1295,13 +1295,12 @@ define void @sink_splat_fmul(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fmul: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB20_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a0, a0, 16 @@ -1332,13 +1331,12 @@ define void @sink_splat_fdiv(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fdiv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB21_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a0, a0, 16 @@ -1369,13 +1367,12 @@ define void @sink_splat_frdiv(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frdiv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB22_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a0, a0, 16 @@ -1406,13 +1403,12 @@ define void @sink_splat_fadd(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fadd: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB23_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a0, a0, 16 @@ -1443,13 +1439,12 @@ define void @sink_splat_fsub(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fsub: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB24_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a0, a0, 16 @@ -1480,13 +1475,12 @@ define void @sink_splat_frsub(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frsub: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: .LBB25_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a0, a0, 16 @@ -1517,43 +1511,42 @@ define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fmul_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a3, a2, 2 -; CHECK-NEXT: li a6, 1024 -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a6, a3, .LBB26_2 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB26_5 ; CHECK-NEXT: .LBB26_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a4, a6, a3 -; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB26_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0 +; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) -; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: bne a5, a1, .LBB26_3 +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a6, a6, a1 +; CHECK-NEXT: bne a5, a3, .LBB26_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB26_7 ; CHECK-NEXT: .LBB26_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: addi a1, a3, -1024 +; CHECK-NEXT: slli a2, a3, 2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB26_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft1, 0(a0) -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: fmul.s ft1, ft1, ft0 -; CHECK-NEXT: fsw ft1, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: fmul.s ft0, ft0, fa0 +; CHECK-NEXT: fsw ft0, 0(a0) +; CHECK-NEXT: addi a1, a1, 1 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bgeu a2, a1, .LBB26_6 +; CHECK-NEXT: bgeu a1, a2, .LBB26_6 ; CHECK-NEXT: .LBB26_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1610,43 +1603,42 @@ define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a3, a2, 2 -; CHECK-NEXT: li a6, 1024 -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a6, a3, .LBB27_2 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB27_5 ; CHECK-NEXT: .LBB27_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a4, a6, a3 -; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB27_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0 +; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) -; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: bne a5, a1, .LBB27_3 +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a6, a6, a1 +; CHECK-NEXT: bne a5, a3, .LBB27_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB27_7 ; CHECK-NEXT: .LBB27_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: addi a1, a3, -1024 +; CHECK-NEXT: slli a2, a3, 2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB27_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft1, 0(a0) -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: fdiv.s ft1, ft1, ft0 -; CHECK-NEXT: fsw ft1, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: fdiv.s ft0, ft0, fa0 +; CHECK-NEXT: fsw ft0, 0(a0) +; CHECK-NEXT: addi a1, a1, 1 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bgeu a2, a1, .LBB27_6 +; CHECK-NEXT: bgeu a1, a2, .LBB27_6 ; CHECK-NEXT: .LBB27_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1703,43 +1695,42 @@ define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a3, a2, 2 -; CHECK-NEXT: li a6, 1024 -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a6, a3, .LBB28_2 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB28_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB28_5 ; CHECK-NEXT: .LBB28_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a4, a6, a3 -; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB28_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0 +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) -; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: bne a5, a1, .LBB28_3 +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a6, a6, a1 +; CHECK-NEXT: bne a5, a3, .LBB28_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB28_7 ; CHECK-NEXT: .LBB28_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: addi a1, a3, -1024 +; CHECK-NEXT: slli a2, a3, 2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB28_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft1, 0(a0) -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: fdiv.s ft1, ft0, ft1 -; CHECK-NEXT: fsw ft1, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: fdiv.s ft0, fa0, ft0 +; CHECK-NEXT: fsw ft0, 0(a0) +; CHECK-NEXT: addi a1, a1, 1 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bgeu a2, a1, .LBB28_6 +; CHECK-NEXT: bgeu a1, a2, .LBB28_6 ; CHECK-NEXT: .LBB28_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1796,43 +1787,42 @@ define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fadd_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a3, a2, 2 -; CHECK-NEXT: li a6, 1024 -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a6, a3, .LBB29_2 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB29_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB29_5 ; CHECK-NEXT: .LBB29_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a4, a6, a3 -; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB29_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0 +; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) -; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: bne a5, a1, .LBB29_3 +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a6, a6, a1 +; CHECK-NEXT: bne a5, a3, .LBB29_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB29_7 ; CHECK-NEXT: .LBB29_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: addi a1, a3, -1024 +; CHECK-NEXT: slli a2, a3, 2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB29_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft1, 0(a0) -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: fadd.s ft1, ft1, ft0 -; CHECK-NEXT: fsw ft1, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: fadd.s ft0, ft0, fa0 +; CHECK-NEXT: fsw ft0, 0(a0) +; CHECK-NEXT: addi a1, a1, 1 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bgeu a2, a1, .LBB29_6 +; CHECK-NEXT: bgeu a1, a2, .LBB29_6 ; CHECK-NEXT: .LBB29_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1889,43 +1879,42 @@ define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a3, a2, 2 -; CHECK-NEXT: li a6, 1024 -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a6, a3, .LBB30_2 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB30_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB30_5 ; CHECK-NEXT: .LBB30_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a4, a6, a3 -; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB30_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0 +; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) -; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: bne a5, a1, .LBB30_3 +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a6, a6, a1 +; CHECK-NEXT: bne a5, a3, .LBB30_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB30_7 ; CHECK-NEXT: .LBB30_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: addi a1, a3, -1024 +; CHECK-NEXT: slli a2, a3, 2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB30_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft1, 0(a0) -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: fsub.s ft1, ft1, ft0 -; CHECK-NEXT: fsw ft1, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: fsub.s ft0, ft0, fa0 +; CHECK-NEXT: fsw ft0, 0(a0) +; CHECK-NEXT: addi a1, a1, 1 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bgeu a2, a1, .LBB30_6 +; CHECK-NEXT: bgeu a1, a2, .LBB30_6 ; CHECK-NEXT: .LBB30_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -1982,43 +1971,42 @@ define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a3, a2, 2 -; CHECK-NEXT: li a6, 1024 -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: bgeu a6, a3, .LBB31_2 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a2, a1, 2 +; CHECK-NEXT: li a3, 1024 +; CHECK-NEXT: bgeu a3, a2, .LBB31_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: j .LBB31_5 ; CHECK-NEXT: .LBB31_2: # %vector.ph ; CHECK-NEXT: li a5, 0 -; CHECK-NEXT: remu a4, a6, a3 -; CHECK-NEXT: sub a1, a6, a4 +; CHECK-NEXT: remu a4, a3, a2 +; CHECK-NEXT: sub a3, a3, a4 ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: .LBB31_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0 +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a6) -; CHECK-NEXT: add a5, a5, a3 -; CHECK-NEXT: add a6, a6, a2 -; CHECK-NEXT: bne a5, a1, .LBB31_3 +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a6, a6, a1 +; CHECK-NEXT: bne a5, a3, .LBB31_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB31_7 ; CHECK-NEXT: .LBB31_5: # %for.body.preheader -; CHECK-NEXT: addi a2, a1, -1024 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: addi a1, a3, -1024 +; CHECK-NEXT: slli a2, a3, 2 +; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: .LBB31_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft1, 0(a0) -; CHECK-NEXT: mv a1, a2 -; CHECK-NEXT: fsub.s ft1, ft0, ft1 -; CHECK-NEXT: fsw ft1, 0(a0) -; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: fsub.s ft0, fa0, ft0 +; CHECK-NEXT: fsw ft0, 0(a0) +; CHECK-NEXT: addi a1, a1, 1 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bgeu a2, a1, .LBB31_6 +; CHECK-NEXT: bgeu a1, a2, .LBB31_6 ; CHECK-NEXT: .LBB31_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2075,14 +2063,13 @@ define void @sink_splat_fma(float* noalias nocapture %a, float* nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a2 ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB32_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfmacc.vf v9, ft0, v8 +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a1, a1, 16 @@ -2117,14 +2104,13 @@ define void @sink_splat_fma_commute(float* noalias nocapture %a, float* nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_commute: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a2 ; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB33_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vfmacc.vf v9, ft0, v8 +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vse32.v v9, (a0) ; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a1, a1, 16 @@ -2159,19 +2145,18 @@ define void @sink_splat_fma_scalable(float* noalias nocapture %a, float* noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: srli a4, a3, 2 -; CHECK-NEXT: li t0, 1024 -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: bgeu t0, a4, .LBB34_2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: bgeu a4, a3, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: j .LBB34_5 ; CHECK-NEXT: .LBB34_2: # %vector.ph ; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: li a7, 0 -; CHECK-NEXT: remu a5, t0, a4 -; CHECK-NEXT: sub a2, t0, a5 +; CHECK-NEXT: remu a5, a4, a3 +; CHECK-NEXT: sub a4, a4, a5 ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add t0, a0, a6 @@ -2179,29 +2164,29 @@ ; CHECK-NEXT: add t1, a1, a6 ; CHECK-NEXT: vl1re32.v v9, (t1) ; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmacc.vf v9, ft0, v8 +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vs1r.v v9, (t0) -; CHECK-NEXT: add a7, a7, a4 -; CHECK-NEXT: add a6, a6, a3 -; CHECK-NEXT: bne a7, a2, .LBB34_3 +; CHECK-NEXT: add a7, a7, a3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: bne a7, a4, .LBB34_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a5, .LBB34_7 ; CHECK-NEXT: .LBB34_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 -; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: addi a2, a4, -1024 +; CHECK-NEXT: slli a3, a4, 2 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB34_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft1, 0(a0) -; CHECK-NEXT: flw ft2, 0(a1) -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: fmadd.s ft1, ft1, ft0, ft2 -; CHECK-NEXT: fsw ft1, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 +; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: flw ft1, 0(a1) +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: fmadd.s ft0, ft0, fa0, ft1 +; CHECK-NEXT: fsw ft0, 0(a0) +; CHECK-NEXT: addi a2, a2, 1 ; CHECK-NEXT: addi a1, a1, 4 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bgeu a3, a2, .LBB34_6 +; CHECK-NEXT: bgeu a2, a3, .LBB34_6 ; CHECK-NEXT: .LBB34_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2263,19 +2248,18 @@ define void @sink_splat_fma_commute_scalable(float* noalias nocapture %a, float* noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_commute_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: srli a4, a3, 2 -; CHECK-NEXT: li t0, 1024 -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: bgeu t0, a4, .LBB35_2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a4, 1024 +; CHECK-NEXT: bgeu a4, a3, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a4, 0 ; CHECK-NEXT: j .LBB35_5 ; CHECK-NEXT: .LBB35_2: # %vector.ph ; CHECK-NEXT: li a6, 0 ; CHECK-NEXT: li a7, 0 -; CHECK-NEXT: remu a5, t0, a4 -; CHECK-NEXT: sub a2, t0, a5 +; CHECK-NEXT: remu a5, a4, a3 +; CHECK-NEXT: sub a4, a4, a5 ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add t0, a0, a6 @@ -2283,29 +2267,29 @@ ; CHECK-NEXT: add t1, a1, a6 ; CHECK-NEXT: vl1re32.v v9, (t1) ; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, mu -; CHECK-NEXT: vfmacc.vf v9, ft0, v8 +; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vs1r.v v9, (t0) -; CHECK-NEXT: add a7, a7, a4 -; CHECK-NEXT: add a6, a6, a3 -; CHECK-NEXT: bne a7, a2, .LBB35_3 +; CHECK-NEXT: add a7, a7, a3 +; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: bne a7, a4, .LBB35_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a5, .LBB35_7 ; CHECK-NEXT: .LBB35_5: # %for.body.preheader -; CHECK-NEXT: addi a3, a2, -1024 -; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: addi a2, a4, -1024 +; CHECK-NEXT: slli a3, a4, 2 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: .LBB35_6: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: flw ft1, 0(a0) -; CHECK-NEXT: flw ft2, 0(a1) -; CHECK-NEXT: mv a2, a3 -; CHECK-NEXT: fmadd.s ft1, ft0, ft1, ft2 -; CHECK-NEXT: fsw ft1, 0(a0) -; CHECK-NEXT: addi a3, a3, 1 +; CHECK-NEXT: flw ft0, 0(a0) +; CHECK-NEXT: flw ft1, 0(a1) +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: fmadd.s ft0, fa0, ft0, ft1 +; CHECK-NEXT: fsw ft0, 0(a0) +; CHECK-NEXT: addi a2, a2, 1 ; CHECK-NEXT: addi a1, a1, 4 ; CHECK-NEXT: addi a0, a0, 4 -; CHECK-NEXT: bgeu a3, a2, .LBB35_6 +; CHECK-NEXT: bgeu a2, a3, .LBB35_6 ; CHECK-NEXT: .LBB35_7: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -2410,14 +2394,13 @@ define void @sink_splat_fcmp(float* nocapture %x, float %y) { ; CHECK-LABEL: sink_splat_fcmp: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 ; CHECK-NEXT: li a1, 1024 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB37_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vmfeq.vf v0, v9, ft0 +; CHECK-NEXT: vmfeq.vf v0, v9, fa0 ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: addi a1, a1, -4 ; CHECK-NEXT: addi a0, a0, 16 @@ -3287,19 +3270,18 @@ define void @sink_splat_vp_fmul(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_fmul: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB54_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vfmul.vf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v8, fa0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: bnez a1, .LBB54_1 +; CHECK-NEXT: bnez a2, .LBB54_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -3328,19 +3310,18 @@ define void @sink_splat_vp_fdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_fdiv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB55_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vfdiv.vf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vfdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: bnez a1, .LBB55_1 +; CHECK-NEXT: bnez a2, .LBB55_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -3367,19 +3348,18 @@ define void @sink_splat_vp_frdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_frdiv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB56_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vfrdiv.vf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vfrdiv.vf v8, v8, fa0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: bnez a1, .LBB56_1 +; CHECK-NEXT: bnez a2, .LBB56_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -3408,19 +3388,18 @@ define void @sink_splat_vp_fadd(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_fadd: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB57_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vfadd.vf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: bnez a1, .LBB57_1 +; CHECK-NEXT: bnez a2, .LBB57_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -3449,19 +3428,18 @@ define void @sink_splat_vp_fsub(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_fsub: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB58_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vfsub.vf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: bnez a1, .LBB58_1 +; CHECK-NEXT: bnez a2, .LBB58_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -3490,19 +3468,18 @@ define void @sink_splat_vp_frsub(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_frsub: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: li a2, 1024 ; CHECK-NEXT: .LBB59_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu -; CHECK-NEXT: vfrsub.vf v8, v8, ft0, v0.t +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: addi a1, a1, -4 +; CHECK-NEXT: addi a2, a2, -4 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: bnez a1, .LBB59_1 +; CHECK-NEXT: bnez a2, .LBB59_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -3732,21 +3709,20 @@ define void @sink_splat_vp_fma(float* noalias nocapture %a, float* nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_fma: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: .LBB65_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vsetvli zero, a3, e32, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a3, a3, -4 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: bnez a2, .LBB65_1 +; CHECK-NEXT: bnez a3, .LBB65_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -3776,21 +3752,20 @@ define void @sink_splat_vp_fma_commute(float* noalias nocapture %a, float* nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_fma_commute: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a2 -; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: li a3, 1024 ; CHECK-NEXT: .LBB66_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) -; CHECK-NEXT: vsetvli zero, a3, e32, m1, tu, mu -; CHECK-NEXT: vfmadd.vf v8, ft0, v9, v0.t +; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: addi a2, a2, -4 +; CHECK-NEXT: addi a3, a3, -4 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: bnez a2, .LBB66_1 +; CHECK-NEXT: bnez a3, .LBB66_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vmfeq.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmfeq_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmfeq_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmfeq_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmfeq_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmfeq_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmfeq_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmfeq_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmfeq_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmfeq_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,13 +1056,8 @@ define @intrinsic_vmfeq_vf_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv1f64.f64( @@ -1101,16 +1078,11 @@ define @intrinsic_vmfeq_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.mask.nxv1f64.f64( @@ -1131,13 +1103,8 @@ define @intrinsic_vmfeq_vf_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv2f64.f64( @@ -1158,16 +1125,11 @@ define @intrinsic_vmfeq_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.mask.nxv2f64.f64( @@ -1188,13 +1150,8 @@ define @intrinsic_vmfeq_vf_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv4f64.f64( @@ -1215,16 +1172,11 @@ define @intrinsic_vmfeq_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vmfeq.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmfeq_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmfeq_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmfeq_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmfeq_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmfeq_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmfeq_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmfeq_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmfeq_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmfeq_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,9 +1056,8 @@ define @intrinsic_vmfeq_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv1f64.f64( @@ -1098,10 +1079,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfeq.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -1123,9 +1103,8 @@ define @intrinsic_vmfeq_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv2f64.f64( @@ -1147,10 +1126,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1172,9 +1150,8 @@ define @intrinsic_vmfeq_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfeq.nxv4f64.f64( @@ -1196,10 +1173,9 @@ ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vmfge.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmfge_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmfge_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmfge_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmfge_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmfge_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmfge_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmfge_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmfge_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmfge_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,13 +1056,8 @@ define @intrinsic_vmfge_vf_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv1f64.f64( @@ -1101,16 +1078,11 @@ define @intrinsic_vmfge_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.mask.nxv1f64.f64( @@ -1131,13 +1103,8 @@ define @intrinsic_vmfge_vf_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv2f64.f64( @@ -1158,16 +1125,11 @@ define @intrinsic_vmfge_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.mask.nxv2f64.f64( @@ -1188,13 +1150,8 @@ define @intrinsic_vmfge_vf_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv4f64.f64( @@ -1215,16 +1172,11 @@ define @intrinsic_vmfge_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vmfge.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmfge_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmfge_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmfge_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmfge_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmfge_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmfge_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmfge_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmfge_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmfge_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,9 +1056,8 @@ define @intrinsic_vmfge_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv1f64.f64( @@ -1098,10 +1079,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfge.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -1123,9 +1103,8 @@ define @intrinsic_vmfge_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv2f64.f64( @@ -1147,10 +1126,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1172,9 +1150,8 @@ define @intrinsic_vmfge_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfge_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vmfge.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmfge.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfge.nxv4f64.f64( @@ -1196,10 +1173,9 @@ ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vmfgt.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmfgt_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmfgt_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmfgt_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmfgt_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmfgt_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmfgt_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmfgt_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmfgt_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmfgt_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,13 +1056,8 @@ define @intrinsic_vmfgt_vf_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv1f64.f64( @@ -1101,16 +1078,11 @@ define @intrinsic_vmfgt_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.mask.nxv1f64.f64( @@ -1131,13 +1103,8 @@ define @intrinsic_vmfgt_vf_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv2f64.f64( @@ -1158,16 +1125,11 @@ define @intrinsic_vmfgt_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.mask.nxv2f64.f64( @@ -1188,13 +1150,8 @@ define @intrinsic_vmfgt_vf_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv4f64.f64( @@ -1215,16 +1172,11 @@ define @intrinsic_vmfgt_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vmfgt.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmfgt_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmfgt_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmfgt_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmfgt_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmfgt_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmfgt_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmfgt_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmfgt_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmfgt_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,9 +1056,8 @@ define @intrinsic_vmfgt_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv1f64.f64( @@ -1098,10 +1079,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfgt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -1123,9 +1103,8 @@ define @intrinsic_vmfgt_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv2f64.f64( @@ -1147,10 +1126,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1172,9 +1150,8 @@ define @intrinsic_vmfgt_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vmfgt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfgt.nxv4f64.f64( @@ -1196,10 +1173,9 @@ ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vmfle.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmfle_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmfle_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmfle_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmfle_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmfle_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmfle_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmfle_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmfle_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmfle_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,13 +1056,8 @@ define @intrinsic_vmfle_vf_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv1f64.f64( @@ -1101,16 +1078,11 @@ define @intrinsic_vmfle_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.mask.nxv1f64.f64( @@ -1131,13 +1103,8 @@ define @intrinsic_vmfle_vf_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv2f64.f64( @@ -1158,16 +1125,11 @@ define @intrinsic_vmfle_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.mask.nxv2f64.f64( @@ -1188,13 +1150,8 @@ define @intrinsic_vmfle_vf_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv4f64.f64( @@ -1215,16 +1172,11 @@ define @intrinsic_vmfle_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vmfle.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmfle_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmfle_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmfle_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmfle_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmfle_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmfle_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmfle_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmfle_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmfle_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,9 +1056,8 @@ define @intrinsic_vmfle_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv1f64.f64( @@ -1098,10 +1079,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfle.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -1123,9 +1103,8 @@ define @intrinsic_vmfle_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv2f64.f64( @@ -1147,10 +1126,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1172,9 +1150,8 @@ define @intrinsic_vmfle_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfle_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmfle.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfle.nxv4f64.f64( @@ -1196,10 +1173,9 @@ ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vmflt.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmflt_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmflt_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmflt_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmflt_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmflt_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmflt_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmflt_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmflt_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmflt_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,13 +1056,8 @@ define @intrinsic_vmflt_vf_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv1f64.f64( @@ -1101,16 +1078,11 @@ define @intrinsic_vmflt_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.mask.nxv1f64.f64( @@ -1131,13 +1103,8 @@ define @intrinsic_vmflt_vf_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv2f64.f64( @@ -1158,16 +1125,11 @@ define @intrinsic_vmflt_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.mask.nxv2f64.f64( @@ -1188,13 +1150,8 @@ define @intrinsic_vmflt_vf_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv4f64.f64( @@ -1215,16 +1172,11 @@ define @intrinsic_vmflt_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vmflt.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmflt_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmflt_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmflt_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmflt_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmflt_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmflt_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmflt_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmflt_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmflt_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,9 +1056,8 @@ define @intrinsic_vmflt_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv1f64.f64( @@ -1098,10 +1079,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmflt.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -1123,9 +1103,8 @@ define @intrinsic_vmflt_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv2f64.f64( @@ -1147,10 +1126,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1172,9 +1150,8 @@ define @intrinsic_vmflt_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmflt_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmflt.nxv4f64.f64( @@ -1196,10 +1173,9 @@ ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=ilp32d < %s | FileCheck %s declare @llvm.riscv.vmfne.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmfne_vf_nxv1f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmfne_vf_nxv2f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmfne_vf_nxv4f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmfne_vf_nxv8f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmfne_vf_nxv16f16_f16( %0, half %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmfne_vf_nxv1f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmfne_vf_nxv2f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmfne_vf_nxv4f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmfne_vf_nxv8f32_f32( %0, float %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,13 +1056,8 @@ define @intrinsic_vmfne_vf_nxv1f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv1f64.f64( @@ -1101,16 +1078,11 @@ define @intrinsic_vmfne_mask_vf_nxv1f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.mask.nxv1f64.f64( @@ -1131,13 +1103,8 @@ define @intrinsic_vmfne_vf_nxv2f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv2f64.f64( @@ -1158,16 +1125,11 @@ define @intrinsic_vmfne_mask_vf_nxv2f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.mask.nxv2f64.f64( @@ -1188,13 +1150,8 @@ define @intrinsic_vmfne_vf_nxv4f64_f64( %0, double %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv4f64.f64( @@ -1215,16 +1172,11 @@ define @intrinsic_vmfne_mask_vf_nxv4f64_f64( %0, %1, double %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw a0, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) -; CHECK-NEXT: fld ft0, 8(sp) ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: -target-abi=lp64d < %s | FileCheck %s declare @llvm.riscv.vmfne.nxv1f16( , , @@ -633,9 +633,8 @@ define @intrinsic_vmfne_vf_nxv1f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv1f16.f16( @@ -657,10 +656,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -682,9 +680,8 @@ define @intrinsic_vmfne_vf_nxv2f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv2f16.f16( @@ -706,10 +703,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -731,9 +727,8 @@ define @intrinsic_vmfne_vf_nxv4f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv4f16.f16( @@ -755,10 +750,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -780,9 +774,8 @@ define @intrinsic_vmfne_vf_nxv8f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv8f16.f16( @@ -804,10 +797,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -829,9 +821,8 @@ define @intrinsic_vmfne_vf_nxv16f16_f16( %0, half %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv16f16.f16( @@ -853,10 +844,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.h.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -878,9 +868,8 @@ define @intrinsic_vmfne_vf_nxv1f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv1f32.f32( @@ -902,10 +891,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: @@ -927,9 +915,8 @@ define @intrinsic_vmfne_vf_nxv2f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv2f32.f32( @@ -951,10 +938,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -976,9 +962,8 @@ define @intrinsic_vmfne_vf_nxv4f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv4f32.f32( @@ -1000,10 +985,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1025,9 +1009,8 @@ define @intrinsic_vmfne_vf_nxv8f32_f32( %0, float %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv8f32.f32( @@ -1049,10 +1032,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: @@ -1074,9 +1056,8 @@ define @intrinsic_vmfne_vf_nxv1f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv1f64.f64( @@ -1098,10 +1079,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmfne.vf v10, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: @@ -1123,9 +1103,8 @@ define @intrinsic_vmfne_vf_nxv2f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv2f64.f64( @@ -1147,10 +1126,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: @@ -1172,9 +1150,8 @@ define @intrinsic_vmfne_vf_nxv4f64_f64( %0, double %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmfne_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vf v0, v8, ft0 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmfne.nxv4f64.f64( @@ -1196,10 +1173,9 @@ ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: fmv.d.x ft0, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, ft0, v0.t +; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \ -; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s +; RUN: -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s ; The following tests check whether inserting VSETVLI avoids inserting ; unneeded vsetvlis across basic blocks. @@ -445,24 +445,22 @@ define void @saxpy_vec(i64 %n, float %a, float* nocapture readonly %x, float* nocapture %y) { ; CHECK-LABEL: saxpy_vec: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a4, a0, e32, m8, ta, mu -; CHECK-NEXT: beqz a4, .LBB8_3 -; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: fmv.w.x ft0, a1 -; CHECK-NEXT: .LBB8_2: # %for.body +; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, mu +; CHECK-NEXT: beqz a3, .LBB8_2 +; CHECK-NEXT: .LBB8_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vle32.v v8, (a2) -; CHECK-NEXT: vle32.v v16, (a3) -; CHECK-NEXT: slli a1, a4, 2 -; CHECK-NEXT: add a2, a2, a1 -; CHECK-NEXT: vsetvli zero, a4, e32, m8, tu, mu -; CHECK-NEXT: vfmacc.vf v16, ft0, v8 -; CHECK-NEXT: vse32.v v16, (a3) -; CHECK-NEXT: sub a0, a0, a4 -; CHECK-NEXT: vsetvli a4, a0, e32, m8, ta, mu -; CHECK-NEXT: add a3, a3, a1 -; CHECK-NEXT: bnez a4, .LBB8_2 -; CHECK-NEXT: .LBB8_3: # %for.end +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle32.v v16, (a2) +; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: add a1, a1, a4 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, tu, mu +; CHECK-NEXT: vfmacc.vf v16, fa0, v8 +; CHECK-NEXT: vse32.v v16, (a2) +; CHECK-NEXT: sub a0, a0, a3 +; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, mu +; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: bnez a3, .LBB8_1 +; CHECK-NEXT: .LBB8_2: # %for.end ; CHECK-NEXT: ret entry: %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 3) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \ -; RUN: -verify-machineinstrs -O2 < %s | FileCheck %s +; RUN: -target-abi=lp64d -verify-machineinstrs -O2 < %s | FileCheck %s declare i64 @llvm.riscv.vsetvli(i64, i64, i64) declare i64 @llvm.riscv.vsetvlimax(i64, i64) @@ -196,9 +196,8 @@ define @test10( %a, double %b) nounwind { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, mu -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: %x = tail call i64 @llvm.riscv.vsetvlimax(i64 3, i64 0) @@ -210,9 +209,8 @@ define @test11( %a, double %b) nounwind { ; CHECK-LABEL: test11: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 ; CHECK-NEXT: vsetivli a0, 6, e64, m1, tu, mu -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: %x = tail call i64 @llvm.riscv.vsetvli(i64 6, i64 3, i64 0) @@ -224,10 +222,9 @@ define @test12( %a, double %b, %mask) nounwind { ; CHECK-LABEL: test12: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fmv.d.x ft0, a0 ; CHECK-NEXT: vsetivli zero, 9, e64, m1, tu, mu ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t -; CHECK-NEXT: vfmv.s.f v8, ft0 +; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: ret entry: %x = call @llvm.riscv.vfadd.mask.nxv1f64.f64(