diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -261,6 +261,19 @@ bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; + bool softPromoteHalfType() const override { return true; } + + /// Return the register type for a given MVT, ensuring vectors are treated + /// as a series of gpr sized integers. + MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, + EVT VT) const override; + + /// Return the number of registers for a given MVT, ensuring vectors are + /// treated as a series of gpr sized integers. + unsigned getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const override; + /// Return true if the given shuffle mask can be codegen'd directly, or if it /// should be stack expanded. bool isShuffleMaskLegal(ArrayRef M, EVT VT) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -858,6 +858,28 @@ (VT == MVT::f64 && Subtarget.hasStdExtD()); } +MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const { + // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still + // end up using a GPR but that will be decided based on ABI. + if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) + return MVT::f32; + + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); +} + +unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + CallingConv::ID CC, + EVT VT) const { + // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still + // end up using a GPR but that will be decided based on ABI. + if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) + return 1; + + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); +} + // Changes the condition code and swaps operands if necessary, so the SetCC // operation matches one of the comparisons supported directly by branches // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll @@ -191,8 +191,8 @@ ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a7 ; RV32I-NEXT: lhu a0, 16(sp) +; RV32I-NEXT: mv s0, a7 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: call __fixsfsi@plt ; RV32I-NEXT: add a0, s0, a0 @@ -206,8 +206,8 @@ ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a7 ; RV64I-NEXT: lhu a0, 16(sp) +; RV64I-NEXT: mv s0, a7 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: call __fixsfdi@plt ; RV64I-NEXT: addw a0, s0, a0 @@ -221,8 +221,8 @@ ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: mv s0, a7 ; RV32IF-NEXT: lhu a0, 16(sp) +; RV32IF-NEXT: mv s0, a7 ; RV32IF-NEXT: call __gnu_h2f_ieee@plt ; RV32IF-NEXT: fmv.w.x ft0, a0 ; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz @@ -237,8 +237,8 @@ ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IF-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64IF-NEXT: mv s0, a7 ; RV64IF-NEXT: lhu a0, 16(sp) +; RV64IF-NEXT: mv s0, a7 ; RV64IF-NEXT: call __gnu_h2f_ieee@plt ; RV64IF-NEXT: fmv.w.x ft0, a0 ; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll --- a/llvm/test/CodeGen/RISCV/copysign-casts.ll +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -340,98 +340,73 @@ define half @fold_demote_h_s(half %a, float %b) nounwind { ; RV32I-LABEL: fold_demote_h_s: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: and a2, s0, a1 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: lui a2, 8 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srli a1, a1, 16 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fold_demote_h_s: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: lui a1, 524288 -; RV64I-NEXT: and a2, s0, a1 -; RV64I-NEXT: addiw a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: lui a2, 8 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: slli a2, a2, 31 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a1, a1, 16 +; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32IF-LABEL: fold_demote_h_s: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: fmv.s fs0, fa1 -; RV32IF-NEXT: fmv.x.w a0, fa0 -; RV32IF-NEXT: call __gnu_h2f_ieee@plt -; RV32IF-NEXT: fsgnj.s fa0, fa0, fs0 -; RV32IF-NEXT: call __gnu_f2h_ieee@plt +; RV32IF-NEXT: fmv.x.w a0, fa1 +; RV32IF-NEXT: fmv.x.w a1, fa0 +; RV32IF-NEXT: lui a2, 8 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a1, a1, a2 +; RV32IF-NEXT: lui a2, 524288 +; RV32IF-NEXT: and a0, a0, a2 +; RV32IF-NEXT: srli a0, a0, 16 +; RV32IF-NEXT: or a0, a1, a0 ; RV32IF-NEXT: lui a1, 1048560 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: fmv.w.x fa0, a0 -; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; ; RV32IFD-LABEL: fold_demote_h_s: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fmv.s fs0, fa1 -; RV32IFD-NEXT: fmv.x.w a0, fa0 -; RV32IFD-NEXT: call __gnu_h2f_ieee@plt -; RV32IFD-NEXT: fsgnj.s fa0, fa0, fs0 -; RV32IFD-NEXT: call __gnu_f2h_ieee@plt +; RV32IFD-NEXT: fmv.x.w a0, fa1 +; RV32IFD-NEXT: fmv.x.w a1, fa0 +; RV32IFD-NEXT: lui a2, 8 +; RV32IFD-NEXT: addi a2, a2, -1 +; RV32IFD-NEXT: and a1, a1, a2 +; RV32IFD-NEXT: lui a2, 524288 +; RV32IFD-NEXT: and a0, a0, a2 +; RV32IFD-NEXT: srli a0, a0, 16 +; RV32IFD-NEXT: or a0, a1, a0 ; RV32IFD-NEXT: lui a1, 1048560 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: fmv.w.x fa0, a0 -; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fold_demote_h_s: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: addi sp, sp, -16 -; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: fmv.s fs0, fa1 -; RV64IFD-NEXT: fmv.x.w a0, fa0 -; RV64IFD-NEXT: call __gnu_h2f_ieee@plt -; RV64IFD-NEXT: fsgnj.s fa0, fa0, fs0 -; RV64IFD-NEXT: call __gnu_f2h_ieee@plt +; RV64IFD-NEXT: fmv.x.w a0, fa1 +; RV64IFD-NEXT: fmv.x.w a1, fa0 +; RV64IFD-NEXT: lui a2, 8 +; RV64IFD-NEXT: addiw a2, a2, -1 +; RV64IFD-NEXT: and a1, a1, a2 +; RV64IFD-NEXT: lui a2, 524288 +; RV64IFD-NEXT: and a0, a0, a2 +; RV64IFD-NEXT: srli a0, a0, 16 +; RV64IFD-NEXT: or a0, a1, a0 ; RV64IFD-NEXT: lui a1, 1048560 ; RV64IFD-NEXT: or a0, a0, a1 ; RV64IFD-NEXT: fmv.w.x fa0, a0 -; RV64IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret ; ; RV32IFZFH-LABEL: fold_demote_h_s: @@ -459,104 +434,76 @@ define half @fold_demote_h_d(half %a, double %b) nounwind { ; RV32I-LABEL: fold_demote_h_d: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: lui a1, 8 ; RV32I-NEXT: addi a1, a1, -1 ; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: lui a1, 524288 -; RV32I-NEXT: and a2, s0, a1 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srli a1, a1, 16 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fold_demote_h_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: lui a1, 524288 -; RV64I-NEXT: addiw a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: addi a1, zero, -1 -; RV64I-NEXT: slli a1, a1, 63 -; RV64I-NEXT: and a1, s0, a1 -; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: lui a2, 8 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a2, zero, -1 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a1, a1, 48 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32IF-LABEL: fold_demote_h_d: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: mv s0, a1 ; RV32IF-NEXT: fmv.x.w a0, fa0 -; RV32IF-NEXT: call __gnu_h2f_ieee@plt -; RV32IF-NEXT: fmv.w.x ft0, s0 -; RV32IF-NEXT: fsgnj.s fa0, fa0, ft0 -; RV32IF-NEXT: call __gnu_f2h_ieee@plt +; RV32IF-NEXT: lui a2, 8 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a0, a2 +; RV32IF-NEXT: lui a2, 524288 +; RV32IF-NEXT: and a1, a1, a2 +; RV32IF-NEXT: srli a1, a1, 16 +; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: lui a1, 1048560 ; RV32IF-NEXT: or a0, a0, a1 ; RV32IF-NEXT: fmv.w.x fa0, a0 -; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; ; RV32IFD-LABEL: fold_demote_h_d: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fmv.d fs0, fa1 +; RV32IFD-NEXT: fsd fa1, 8(sp) ; RV32IFD-NEXT: fmv.x.w a0, fa0 -; RV32IFD-NEXT: call __gnu_h2f_ieee@plt -; RV32IFD-NEXT: fcvt.s.d ft0, fs0 -; RV32IFD-NEXT: fsgnj.s fa0, fa0, ft0 -; RV32IFD-NEXT: call __gnu_f2h_ieee@plt +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lui a2, 8 +; RV32IFD-NEXT: addi a2, a2, -1 +; RV32IFD-NEXT: and a0, a0, a2 +; RV32IFD-NEXT: lui a2, 524288 +; RV32IFD-NEXT: and a1, a1, a2 +; RV32IFD-NEXT: srli a1, a1, 16 +; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: lui a1, 1048560 ; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: fmv.w.x fa0, a0 -; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fold_demote_h_d: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: addi sp, sp, -16 -; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill -; RV64IFD-NEXT: fmv.d fs0, fa1 -; RV64IFD-NEXT: fmv.x.w a0, fa0 -; RV64IFD-NEXT: call __gnu_h2f_ieee@plt -; RV64IFD-NEXT: fcvt.s.d ft0, fs0 -; RV64IFD-NEXT: fsgnj.s fa0, fa0, ft0 -; RV64IFD-NEXT: call __gnu_f2h_ieee@plt +; RV64IFD-NEXT: fmv.x.d a0, fa1 +; RV64IFD-NEXT: fmv.x.w a1, fa0 +; RV64IFD-NEXT: lui a2, 8 +; RV64IFD-NEXT: addiw a2, a2, -1 +; RV64IFD-NEXT: and a1, a1, a2 +; RV64IFD-NEXT: addi a2, zero, -1 +; RV64IFD-NEXT: slli a2, a2, 63 +; RV64IFD-NEXT: and a0, a0, a2 +; RV64IFD-NEXT: srli a0, a0, 48 +; RV64IFD-NEXT: or a0, a1, a0 ; RV64IFD-NEXT: lui a1, 1048560 ; RV64IFD-NEXT: or a0, a0, a1 ; RV64IFD-NEXT: fmv.w.x fa0, a0 -; RV64IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret ; ; RV32IFZFH-LABEL: fold_demote_h_d: diff --git a/llvm/test/CodeGen/RISCV/fp16-promote.ll b/llvm/test/CodeGen/RISCV/fp16-promote.ll --- a/llvm/test/CodeGen/RISCV/fp16-promote.ll +++ b/llvm/test/CodeGen/RISCV/fp16-promote.ll @@ -87,16 +87,16 @@ ; CHECK-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; CHECK-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: mv s0, a1 -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: lhu a0, 0(a0) +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: lhu s1, 0(a0) +; CHECK-NEXT: lhu a0, 0(a1) ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.s fs0, fa0 -; CHECK-NEXT: lhu a0, 0(s0) +; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: fadd.s fa0, fs0, fa0 +; CHECK-NEXT: fadd.s fa0, fa0, fs0 ; CHECK-NEXT: call __gnu_f2h_ieee@plt -; CHECK-NEXT: sh a0, 0(s1) +; CHECK-NEXT: sh a0, 0(s0) ; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; CHECK-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -118,16 +118,16 @@ ; CHECK-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; CHECK-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: mv s0, a1 -; CHECK-NEXT: mv s1, a0 -; CHECK-NEXT: lhu a0, 0(a0) +; CHECK-NEXT: mv s0, a0 +; CHECK-NEXT: lhu s1, 0(a0) +; CHECK-NEXT: lhu a0, 0(a1) ; CHECK-NEXT: call __gnu_h2f_ieee@plt ; CHECK-NEXT: fmv.s fs0, fa0 -; CHECK-NEXT: lhu a0, 0(s0) +; CHECK-NEXT: mv a0, s1 ; CHECK-NEXT: call __gnu_h2f_ieee@plt -; CHECK-NEXT: fmul.s fa0, fs0, fa0 +; CHECK-NEXT: fmul.s fa0, fa0, fs0 ; CHECK-NEXT: call __gnu_f2h_ieee@plt -; CHECK-NEXT: sh a0, 0(s1) +; CHECK-NEXT: sh a0, 0(s0) ; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload ; CHECK-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; CHECK-NEXT: lw s0, 24(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll --- a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll @@ -81,26 +81,13 @@ define half @fcopysign_fneg(half %a, half %b) nounwind { ; RV32I-LABEL: fcopysign_fneg: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a1 -; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: and a0, a0, a1 -; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: not a1, s0 -; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: not a1, a1 +; RV32I-NEXT: lui a2, 1048568 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: lui a2, 8 ; RV32I-NEXT: addi a2, a2, -1 ; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: lui a2, 8 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: call __gnu_f2h_ieee@plt -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV32IZFH-LABEL: fcopysign_fneg: @@ -110,26 +97,13 @@ ; ; RV64I-LABEL: fcopysign_fneg: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a1 -; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw a1, a1, -1 -; RV64I-NEXT: and a0, a0, a1 -; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: not a1, s0 -; RV64I-NEXT: lui a2, 524288 -; RV64I-NEXT: addiw a2, a2, -1 -; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: not a1, a1 ; RV64I-NEXT: lui a2, 1048568 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: lui a2, 8 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: call __gnu_f2h_ieee@plt -; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV64IZFH-LABEL: fcopysign_fneg: diff --git a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll --- a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll +++ b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll @@ -17,18 +17,21 @@ ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: lui a1, 16 -; RV32I-NEXT: addi s0, a1, -1 -; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: addi s1, a1, -1 +; RV32I-NEXT: and a0, a0, s1 ; RV32I-NEXT: call __gnu_h2f_ieee@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s0, s1 ; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __addsf3@plt +; RV32I-NEXT: call __gnu_f2h_ieee@plt +; RV32I-NEXT: and a0, a0, s1 +; RV32I-NEXT: call __gnu_h2f_ieee@plt ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __divsf3@plt ; RV32I-NEXT: call __gnu_f2h_ieee@plt @@ -46,18 +49,21 @@ ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: lui a1, 16 -; RV64I-NEXT: addiw s0, a1, -1 -; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: addiw s1, a1, -1 +; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: call __gnu_h2f_ieee@plt -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s0, s1 ; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __addsf3@plt +; RV64I-NEXT: call __gnu_f2h_ieee@plt +; RV64I-NEXT: and a0, a0, s1 +; RV64I-NEXT: call __gnu_h2f_ieee@plt ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __divsf3@plt ; RV64I-NEXT: call __gnu_f2h_ieee@plt