diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7283,7 +7283,20 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, Optional CC) const { + bool IsABIRegCopy = CC.hasValue(); EVT ValueVT = Val.getValueType(); + if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { + // Cast the f16 to i16, extend to i32, pad with ones to make a float nan, + // and cast to f32. + Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val); + Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val); + Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val, + DAG.getConstant(0xFFFF0000, DL, MVT::i32)); + Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); + Parts[0] = Val; + return true; + } + if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { LLVMContext &Context = *DAG.getContext(); EVT ValueEltVT = ValueVT.getVectorElementType(); @@ -7312,6 +7325,17 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, Optional CC) const { + bool IsABIRegCopy = CC.hasValue(); + if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { + SDValue Val = Parts[0]; + + // Cast the f32 to i32, truncate to i16, and cast back to f16. + Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val); + Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val); + Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val); + return Val; + } + if (ValueVT.isScalableVector() && PartVT.isScalableVector()) { LLVMContext &Context = *DAG.getContext(); SDValue Val = Parts[0]; diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll @@ -0,0 +1,528 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32IF +; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64IF +; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi=ilp32f -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32-ILP32F +; RUN: llc -mtriple=riscv64 -mattr=+f -target-abi=lp64f -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64-LP64F + +; Tests passing half arguments and returns without Zfh. +; Covers with and without F extension and ilp32f/ilp64f +; calling conventions. + +define i32 @callee_half_in_regs(i32 %a, half %b) nounwind { +; RV32I-LABEL: callee_half_in_regs: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lui a0, 16 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: call __fixsfsi@plt +; RV32I-NEXT: add a0, s0, a0 +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: callee_half_in_regs: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lui a0, 16 +; RV64I-NEXT: addiw a0, a0, -1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: call __fixsfdi@plt +; RV64I-NEXT: addw a0, s0, a0 +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: callee_half_in_regs: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: mv s0, a0 +; RV32IF-NEXT: mv a0, a1 +; RV32IF-NEXT: call __gnu_h2f_ieee@plt +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IF-NEXT: add a0, s0, a0 +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: callee_half_in_regs: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64IF-NEXT: mv s0, a0 +; RV64IF-NEXT: mv a0, a1 +; RV64IF-NEXT: call __gnu_h2f_ieee@plt +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: addw a0, s0, a0 +; RV64IF-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32-ILP32F-LABEL: callee_half_in_regs: +; RV32-ILP32F: # %bb.0: +; RV32-ILP32F-NEXT: addi sp, sp, -16 +; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-ILP32F-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-ILP32F-NEXT: mv s0, a0 +; RV32-ILP32F-NEXT: fmv.x.w a0, fa0 +; RV32-ILP32F-NEXT: call __gnu_h2f_ieee@plt +; RV32-ILP32F-NEXT: fcvt.w.s a0, fa0, rtz +; RV32-ILP32F-NEXT: add a0, s0, a0 +; RV32-ILP32F-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-ILP32F-NEXT: addi sp, sp, 16 +; RV32-ILP32F-NEXT: ret +; +; RV64-LP64F-LABEL: callee_half_in_regs: +; RV64-LP64F: # %bb.0: +; RV64-LP64F-NEXT: addi sp, sp, -16 +; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-LP64F-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64-LP64F-NEXT: mv s0, a0 +; RV64-LP64F-NEXT: fmv.x.w a0, fa0 +; RV64-LP64F-NEXT: call __gnu_h2f_ieee@plt +; RV64-LP64F-NEXT: fcvt.l.s a0, fa0, rtz +; RV64-LP64F-NEXT: addw a0, s0, a0 +; RV64-LP64F-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-LP64F-NEXT: addi sp, sp, 16 +; RV64-LP64F-NEXT: ret + %b_fptosi = fptosi half %b to i32 + %1 = add i32 %a, %b_fptosi + ret i32 %1 +} + +define i32 @caller_half_in_regs() nounwind { +; RV32I-LABEL: caller_half_in_regs: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: lui a1, 4 +; RV32I-NEXT: call callee_half_in_regs@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_half_in_regs: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi a0, zero, 1 +; RV64I-NEXT: lui a1, 4 +; RV64I-NEXT: call callee_half_in_regs@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: caller_half_in_regs: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: addi a0, zero, 1 +; RV32IF-NEXT: lui a1, 1048564 +; RV32IF-NEXT: call callee_half_in_regs@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: caller_half_in_regs: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: lui a0, %hi(.LCPI1_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI1_0)(a0) +; RV64IF-NEXT: fmv.x.w a1, ft0 +; RV64IF-NEXT: addi a0, zero, 1 +; RV64IF-NEXT: call callee_half_in_regs@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32-ILP32F-LABEL: caller_half_in_regs: +; RV32-ILP32F: # %bb.0: +; RV32-ILP32F-NEXT: addi sp, sp, -16 +; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-ILP32F-NEXT: lui a0, %hi(.LCPI1_0) +; RV32-ILP32F-NEXT: flw fa0, %lo(.LCPI1_0)(a0) +; RV32-ILP32F-NEXT: addi a0, zero, 1 +; RV32-ILP32F-NEXT: call callee_half_in_regs@plt +; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-ILP32F-NEXT: addi sp, sp, 16 +; RV32-ILP32F-NEXT: ret +; +; RV64-LP64F-LABEL: caller_half_in_regs: +; RV64-LP64F: # %bb.0: +; RV64-LP64F-NEXT: addi sp, sp, -16 +; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-LP64F-NEXT: lui a0, %hi(.LCPI1_0) +; RV64-LP64F-NEXT: flw fa0, %lo(.LCPI1_0)(a0) +; RV64-LP64F-NEXT: addi a0, zero, 1 +; RV64-LP64F-NEXT: call callee_half_in_regs@plt +; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-LP64F-NEXT: addi sp, sp, 16 +; RV64-LP64F-NEXT: ret + %1 = call i32 @callee_half_in_regs(i32 1, half 2.0) + ret i32 %1 +} + +define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, half %i) nounwind { +; RV32I-LABEL: callee_half_on_stack: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a7 +; RV32I-NEXT: lhu a0, 16(sp) +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: call __fixsfsi@plt +; RV32I-NEXT: add a0, s0, a0 +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: callee_half_on_stack: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a7 +; RV64I-NEXT: lhu a0, 16(sp) +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: call __fixsfdi@plt +; RV64I-NEXT: addw a0, s0, a0 +; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: callee_half_on_stack: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: mv s0, a7 +; RV32IF-NEXT: lhu a0, 16(sp) +; RV32IF-NEXT: call __gnu_h2f_ieee@plt +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IF-NEXT: add a0, s0, a0 +; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: callee_half_on_stack: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64IF-NEXT: mv s0, a7 +; RV64IF-NEXT: lhu a0, 16(sp) +; RV64IF-NEXT: call __gnu_h2f_ieee@plt +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: addw a0, s0, a0 +; RV64IF-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32-ILP32F-LABEL: callee_half_on_stack: +; RV32-ILP32F: # %bb.0: +; RV32-ILP32F-NEXT: addi sp, sp, -16 +; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-ILP32F-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-ILP32F-NEXT: mv s0, a7 +; RV32-ILP32F-NEXT: fmv.x.w a0, fa0 +; RV32-ILP32F-NEXT: call __gnu_h2f_ieee@plt +; RV32-ILP32F-NEXT: fcvt.w.s a0, fa0, rtz +; RV32-ILP32F-NEXT: add a0, s0, a0 +; RV32-ILP32F-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-ILP32F-NEXT: addi sp, sp, 16 +; RV32-ILP32F-NEXT: ret +; +; RV64-LP64F-LABEL: callee_half_on_stack: +; RV64-LP64F: # %bb.0: +; RV64-LP64F-NEXT: addi sp, sp, -16 +; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-LP64F-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64-LP64F-NEXT: mv s0, a7 +; RV64-LP64F-NEXT: fmv.x.w a0, fa0 +; RV64-LP64F-NEXT: call __gnu_h2f_ieee@plt +; RV64-LP64F-NEXT: fcvt.l.s a0, fa0, rtz +; RV64-LP64F-NEXT: addw a0, s0, a0 +; RV64-LP64F-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-LP64F-NEXT: addi sp, sp, 16 +; RV64-LP64F-NEXT: ret + %1 = fptosi half %i to i32 + %2 = add i32 %h, %1 + ret i32 %2 +} + +define i32 @caller_half_on_stack() nounwind { +; RV32I-LABEL: caller_half_on_stack: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a0, 5 +; RV32I-NEXT: addi t0, a0, -1792 +; RV32I-NEXT: addi a0, zero, 1 +; RV32I-NEXT: addi a1, zero, 2 +; RV32I-NEXT: addi a2, zero, 3 +; RV32I-NEXT: addi a3, zero, 4 +; RV32I-NEXT: addi a4, zero, 5 +; RV32I-NEXT: addi a5, zero, 6 +; RV32I-NEXT: addi a6, zero, 7 +; RV32I-NEXT: addi a7, zero, 8 +; RV32I-NEXT: sw t0, 0(sp) +; RV32I-NEXT: call callee_half_on_stack@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_half_on_stack: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lui a0, 5 +; RV64I-NEXT: addiw t0, a0, -1792 +; RV64I-NEXT: addi a0, zero, 1 +; RV64I-NEXT: addi a1, zero, 2 +; RV64I-NEXT: addi a2, zero, 3 +; RV64I-NEXT: addi a3, zero, 4 +; RV64I-NEXT: addi a4, zero, 5 +; RV64I-NEXT: addi a5, zero, 6 +; RV64I-NEXT: addi a6, zero, 7 +; RV64I-NEXT: addi a7, zero, 8 +; RV64I-NEXT: sd t0, 0(sp) +; RV64I-NEXT: call callee_half_on_stack@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: caller_half_on_stack: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: lui a0, 1048565 +; RV32IF-NEXT: addi t0, a0, -1792 +; RV32IF-NEXT: addi a0, zero, 1 +; RV32IF-NEXT: addi a1, zero, 2 +; RV32IF-NEXT: addi a2, zero, 3 +; RV32IF-NEXT: addi a3, zero, 4 +; RV32IF-NEXT: addi a4, zero, 5 +; RV32IF-NEXT: addi a5, zero, 6 +; RV32IF-NEXT: addi a6, zero, 7 +; RV32IF-NEXT: addi a7, zero, 8 +; RV32IF-NEXT: sw t0, 0(sp) +; RV32IF-NEXT: call callee_half_on_stack@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: caller_half_on_stack: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: lui a0, 256 +; RV64IF-NEXT: addiw a0, a0, -11 +; RV64IF-NEXT: slli a0, a0, 12 +; RV64IF-NEXT: addi t0, a0, -1792 +; RV64IF-NEXT: addi a0, zero, 1 +; RV64IF-NEXT: addi a1, zero, 2 +; RV64IF-NEXT: addi a2, zero, 3 +; RV64IF-NEXT: addi a3, zero, 4 +; RV64IF-NEXT: addi a4, zero, 5 +; RV64IF-NEXT: addi a5, zero, 6 +; RV64IF-NEXT: addi a6, zero, 7 +; RV64IF-NEXT: addi a7, zero, 8 +; RV64IF-NEXT: sw t0, 0(sp) +; RV64IF-NEXT: call callee_half_on_stack@plt +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32-ILP32F-LABEL: caller_half_on_stack: +; RV32-ILP32F: # %bb.0: +; RV32-ILP32F-NEXT: addi sp, sp, -16 +; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-ILP32F-NEXT: lui a0, %hi(.LCPI3_0) +; RV32-ILP32F-NEXT: flw fa0, %lo(.LCPI3_0)(a0) +; RV32-ILP32F-NEXT: addi a0, zero, 1 +; RV32-ILP32F-NEXT: addi a1, zero, 2 +; RV32-ILP32F-NEXT: addi a2, zero, 3 +; RV32-ILP32F-NEXT: addi a3, zero, 4 +; RV32-ILP32F-NEXT: addi a4, zero, 5 +; RV32-ILP32F-NEXT: addi a5, zero, 6 +; RV32-ILP32F-NEXT: addi a6, zero, 7 +; RV32-ILP32F-NEXT: addi a7, zero, 8 +; RV32-ILP32F-NEXT: call callee_half_on_stack@plt +; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-ILP32F-NEXT: addi sp, sp, 16 +; RV32-ILP32F-NEXT: ret +; +; RV64-LP64F-LABEL: caller_half_on_stack: +; RV64-LP64F: # %bb.0: +; RV64-LP64F-NEXT: addi sp, sp, -16 +; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-LP64F-NEXT: lui a0, %hi(.LCPI3_0) +; RV64-LP64F-NEXT: flw fa0, %lo(.LCPI3_0)(a0) +; RV64-LP64F-NEXT: addi a0, zero, 1 +; RV64-LP64F-NEXT: addi a1, zero, 2 +; RV64-LP64F-NEXT: addi a2, zero, 3 +; RV64-LP64F-NEXT: addi a3, zero, 4 +; RV64-LP64F-NEXT: addi a4, zero, 5 +; RV64-LP64F-NEXT: addi a5, zero, 6 +; RV64-LP64F-NEXT: addi a6, zero, 7 +; RV64-LP64F-NEXT: addi a7, zero, 8 +; RV64-LP64F-NEXT: call callee_half_on_stack@plt +; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-LP64F-NEXT: addi sp, sp, 16 +; RV64-LP64F-NEXT: ret + %1 = call i32 @callee_half_on_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, half 10.0) + ret i32 %1 +} + +define half @callee_half_ret() nounwind { +; RV32I-LABEL: callee_half_ret: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, 4 +; RV32I-NEXT: addi a0, a0, -1024 +; RV32I-NEXT: ret +; +; RV64I-LABEL: callee_half_ret: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 4 +; RV64I-NEXT: addiw a0, a0, -1024 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: callee_half_ret: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lui a0, 1048564 +; RV32IF-NEXT: addi a0, a0, -1024 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: callee_half_ret: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lui a0, %hi(.LCPI4_0) +; RV64IF-NEXT: flw ft0, %lo(.LCPI4_0)(a0) +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32-ILP32F-LABEL: callee_half_ret: +; RV32-ILP32F: # %bb.0: +; RV32-ILP32F-NEXT: lui a0, %hi(.LCPI4_0) +; RV32-ILP32F-NEXT: flw fa0, %lo(.LCPI4_0)(a0) +; RV32-ILP32F-NEXT: ret +; +; RV64-LP64F-LABEL: callee_half_ret: +; RV64-LP64F: # %bb.0: +; RV64-LP64F-NEXT: lui a0, %hi(.LCPI4_0) +; RV64-LP64F-NEXT: flw fa0, %lo(.LCPI4_0)(a0) +; RV64-LP64F-NEXT: ret + ret half 1.0 +} + +define i32 @caller_half_ret() nounwind { +; RV32I-LABEL: caller_half_ret: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call callee_half_ret@plt +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: call __gnu_h2f_ieee@plt +; RV32I-NEXT: call __fixsfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_half_ret: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call callee_half_ret@plt +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: call __gnu_h2f_ieee@plt +; RV64I-NEXT: call __fixsfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: caller_half_ret: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call callee_half_ret@plt +; RV32IF-NEXT: call __gnu_h2f_ieee@plt +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: caller_half_ret: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: call callee_half_ret@plt +; RV64IF-NEXT: call __gnu_h2f_ieee@plt +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV32-ILP32F-LABEL: caller_half_ret: +; RV32-ILP32F: # %bb.0: +; RV32-ILP32F-NEXT: addi sp, sp, -16 +; RV32-ILP32F-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-ILP32F-NEXT: call callee_half_ret@plt +; RV32-ILP32F-NEXT: fmv.x.w a0, fa0 +; RV32-ILP32F-NEXT: call __gnu_h2f_ieee@plt +; RV32-ILP32F-NEXT: fcvt.w.s a0, fa0, rtz +; RV32-ILP32F-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-ILP32F-NEXT: addi sp, sp, 16 +; RV32-ILP32F-NEXT: ret +; +; RV64-LP64F-LABEL: caller_half_ret: +; RV64-LP64F: # %bb.0: +; RV64-LP64F-NEXT: addi sp, sp, -16 +; RV64-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-LP64F-NEXT: call callee_half_ret@plt +; RV64-LP64F-NEXT: fmv.x.w a0, fa0 +; RV64-LP64F-NEXT: call __gnu_h2f_ieee@plt +; RV64-LP64F-NEXT: fcvt.l.s a0, fa0, rtz +; RV64-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-LP64F-NEXT: addi sp, sp, 16 +; RV64-LP64F-NEXT: ret + %1 = call half @callee_half_ret() + %2 = fptosi half %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll --- a/llvm/test/CodeGen/RISCV/copysign-casts.ll +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -124,22 +124,42 @@ ; RV32IF: # %bb.0: ; RV32IF-NEXT: fmv.x.w a2, fa0 ; RV32IF-NEXT: lui a3, 524288 -; RV32IF-NEXT: and a2, a2, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a1, a1, a3 +; RV32IF-NEXT: lui a3, 8 +; RV32IF-NEXT: and a2, a2, a3 +; RV32IF-NEXT: slli a2, a2, 16 ; RV32IF-NEXT: or a1, a1, a2 ; RV32IF-NEXT: ret ; ; RV32IFD-LABEL: fold_promote_d_h: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: fcvt.d.s ft0, fa1 -; RV32IFD-NEXT: fsgnj.d fa0, fa0, ft0 +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fmv.d fs0, fa0 +; RV32IFD-NEXT: fmv.x.w a0, fa1 +; RV32IFD-NEXT: call __gnu_h2f_ieee@plt +; RV32IFD-NEXT: fcvt.d.s ft0, fa0 +; RV32IFD-NEXT: fsgnj.d fa0, fs0, ft0 +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fold_promote_d_h: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fcvt.d.s ft0, fa1 -; RV64IFD-NEXT: fsgnj.d fa0, fa0, ft0 +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: fmv.d fs0, fa0 +; RV64IFD-NEXT: fmv.x.w a0, fa1 +; RV64IFD-NEXT: call __gnu_h2f_ieee@plt +; RV64IFD-NEXT: fcvt.d.s ft0, fa0 +; RV64IFD-NEXT: fsgnj.d fa0, fs0, ft0 +; RV64IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret ; ; RV32IFZFH-LABEL: fold_promote_d_h: @@ -195,17 +215,44 @@ ; ; RV32IF-LABEL: fold_promote_f_h: ; RV32IF: # %bb.0: -; RV32IF-NEXT: fsgnj.s fa0, fa0, fa1 +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: fmv.x.w a0, fa1 +; RV32IF-NEXT: call __gnu_h2f_ieee@plt +; RV32IF-NEXT: fsgnj.s fa0, fs0, fa0 +; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 ; RV32IF-NEXT: ret ; ; RV32IFD-LABEL: fold_promote_f_h: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: fsgnj.s fa0, fa0, fa1 +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fmv.s fs0, fa0 +; RV32IFD-NEXT: fmv.x.w a0, fa1 +; RV32IFD-NEXT: call __gnu_h2f_ieee@plt +; RV32IFD-NEXT: fsgnj.s fa0, fs0, fa0 +; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fold_promote_f_h: ; RV64IFD: # %bb.0: -; RV64IFD-NEXT: fsgnj.s fa0, fa0, fa1 +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV64IFD-NEXT: fmv.s fs0, fa0 +; RV64IFD-NEXT: fmv.x.w a0, fa1 +; RV64IFD-NEXT: call __gnu_h2f_ieee@plt +; RV64IFD-NEXT: fsgnj.s fa0, fs0, fa0 +; RV64IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-NEXT: addi sp, sp, 16 ; RV64IFD-NEXT: ret ; ; RV32IFZFH-LABEL: fold_promote_f_h: @@ -341,9 +388,13 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fsw fs0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: fmv.s fs0, fa1 -; RV32IF-NEXT: call __gnu_f2h_ieee@plt +; RV32IF-NEXT: fmv.x.w a0, fa0 ; RV32IF-NEXT: call __gnu_h2f_ieee@plt ; RV32IF-NEXT: fsgnj.s fa0, fa0, fs0 +; RV32IF-NEXT: call __gnu_f2h_ieee@plt +; RV32IF-NEXT: lui a1, 1048560 +; RV32IF-NEXT: or a0, a0, a1 +; RV32IF-NEXT: fmv.w.x fa0, a0 ; RV32IF-NEXT: flw fs0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -355,9 +406,13 @@ ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: fmv.s fs0, fa1 -; RV32IFD-NEXT: call __gnu_f2h_ieee@plt +; RV32IFD-NEXT: fmv.x.w a0, fa0 ; RV32IFD-NEXT: call __gnu_h2f_ieee@plt ; RV32IFD-NEXT: fsgnj.s fa0, fa0, fs0 +; RV32IFD-NEXT: call __gnu_f2h_ieee@plt +; RV32IFD-NEXT: lui a1, 1048560 +; RV32IFD-NEXT: or a0, a0, a1 +; RV32IFD-NEXT: fmv.w.x fa0, a0 ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 @@ -369,9 +424,13 @@ ; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV64IFD-NEXT: fmv.s fs0, fa1 -; RV64IFD-NEXT: call __gnu_f2h_ieee@plt +; RV64IFD-NEXT: fmv.x.w a0, fa0 ; RV64IFD-NEXT: call __gnu_h2f_ieee@plt ; RV64IFD-NEXT: fsgnj.s fa0, fa0, fs0 +; RV64IFD-NEXT: call __gnu_f2h_ieee@plt +; RV64IFD-NEXT: lui a1, 1048560 +; RV64IFD-NEXT: or a0, a0, a1 +; RV64IFD-NEXT: fmv.w.x fa0, a0 ; RV64IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16 @@ -451,10 +510,14 @@ ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IF-NEXT: mv s0, a1 -; RV32IF-NEXT: call __gnu_f2h_ieee@plt +; RV32IF-NEXT: fmv.x.w a0, fa0 ; RV32IF-NEXT: call __gnu_h2f_ieee@plt ; RV32IF-NEXT: fmv.w.x ft0, s0 ; RV32IF-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IF-NEXT: call __gnu_f2h_ieee@plt +; RV32IF-NEXT: lui a1, 1048560 +; RV32IF-NEXT: or a0, a0, a1 +; RV32IF-NEXT: fmv.w.x fa0, a0 ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -466,10 +529,14 @@ ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: fmv.d fs0, fa1 -; RV32IFD-NEXT: call __gnu_f2h_ieee@plt +; RV32IFD-NEXT: fmv.x.w a0, fa0 ; RV32IFD-NEXT: call __gnu_h2f_ieee@plt ; RV32IFD-NEXT: fcvt.s.d ft0, fs0 ; RV32IFD-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFD-NEXT: call __gnu_f2h_ieee@plt +; RV32IFD-NEXT: lui a1, 1048560 +; RV32IFD-NEXT: or a0, a0, a1 +; RV32IFD-NEXT: fmv.w.x fa0, a0 ; RV32IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: addi sp, sp, 16 @@ -481,10 +548,14 @@ ; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV64IFD-NEXT: fmv.d fs0, fa1 -; RV64IFD-NEXT: call __gnu_f2h_ieee@plt +; RV64IFD-NEXT: fmv.x.w a0, fa0 ; RV64IFD-NEXT: call __gnu_h2f_ieee@plt ; RV64IFD-NEXT: fcvt.s.d ft0, fs0 ; RV64IFD-NEXT: fsgnj.s fa0, fa0, ft0 +; RV64IFD-NEXT: call __gnu_f2h_ieee@plt +; RV64IFD-NEXT: lui a1, 1048560 +; RV64IFD-NEXT: or a0, a0, a1 +; RV64IFD-NEXT: fmv.w.x fa0, a0 ; RV64IFD-NEXT: fld fs0, 0(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IFD-NEXT: addi sp, sp, 16