diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -288,6 +288,7 @@ HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2") HANDLE_LIBCALL(FPEXT_F16_F128, "__extendhftf2") HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2") +HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2") HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee") HANDLE_LIBCALL(FPROUND_F32_F16, "__gnu_f2h_ieee") HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2") @@ -302,6 +303,9 @@ HANDLE_LIBCALL(FPROUND_F128_F64, "__trunctfdf2") HANDLE_LIBCALL(FPROUND_PPCF128_F64, "__gcc_qtod") HANDLE_LIBCALL(FPROUND_F128_F80, "__trunctfxf2") +HANDLE_LIBCALL(FPTOSINT_F16_I32, "__fixhfsi") +HANDLE_LIBCALL(FPTOSINT_F16_I64, "__fixhfdi") +HANDLE_LIBCALL(FPTOSINT_F16_I128, "__fixhfti") HANDLE_LIBCALL(FPTOSINT_F32_I32, "__fixsfsi") HANDLE_LIBCALL(FPTOSINT_F32_I64, "__fixsfdi") HANDLE_LIBCALL(FPTOSINT_F32_I128, "__fixsfti") @@ -317,6 +321,9 @@ HANDLE_LIBCALL(FPTOSINT_PPCF128_I32, "__gcc_qtou") HANDLE_LIBCALL(FPTOSINT_PPCF128_I64, "__fixtfdi") HANDLE_LIBCALL(FPTOSINT_PPCF128_I128, "__fixtfti") +HANDLE_LIBCALL(FPTOUINT_F16_I32, "__fixunshfsi") +HANDLE_LIBCALL(FPTOUINT_F16_I64, "__fixunshfdi") +HANDLE_LIBCALL(FPTOUINT_F16_I128, "__fixunshfti") HANDLE_LIBCALL(FPTOUINT_F32_I32, "__fixunssfsi") HANDLE_LIBCALL(FPTOUINT_F32_I64, "__fixunssfdi") HANDLE_LIBCALL(FPTOUINT_F32_I128, "__fixunssfti") @@ -332,31 +339,37 @@ HANDLE_LIBCALL(FPTOUINT_PPCF128_I32, "__fixunstfsi") HANDLE_LIBCALL(FPTOUINT_PPCF128_I64, "__fixunstfdi") HANDLE_LIBCALL(FPTOUINT_PPCF128_I128, "__fixunstfti") +HANDLE_LIBCALL(SINTTOFP_I32_F16, "__floatsihf") HANDLE_LIBCALL(SINTTOFP_I32_F32, "__floatsisf") HANDLE_LIBCALL(SINTTOFP_I32_F64, "__floatsidf") HANDLE_LIBCALL(SINTTOFP_I32_F80, "__floatsixf") HANDLE_LIBCALL(SINTTOFP_I32_F128, "__floatsitf") HANDLE_LIBCALL(SINTTOFP_I32_PPCF128, "__gcc_itoq") +HANDLE_LIBCALL(SINTTOFP_I64_F16, "__floatdihf") HANDLE_LIBCALL(SINTTOFP_I64_F32, "__floatdisf") HANDLE_LIBCALL(SINTTOFP_I64_F64, "__floatdidf") HANDLE_LIBCALL(SINTTOFP_I64_F80, "__floatdixf") HANDLE_LIBCALL(SINTTOFP_I64_F128, "__floatditf") HANDLE_LIBCALL(SINTTOFP_I64_PPCF128, "__floatditf") +HANDLE_LIBCALL(SINTTOFP_I128_F16, "__floattihf") HANDLE_LIBCALL(SINTTOFP_I128_F32, "__floattisf") HANDLE_LIBCALL(SINTTOFP_I128_F64, "__floattidf") HANDLE_LIBCALL(SINTTOFP_I128_F80, "__floattixf") HANDLE_LIBCALL(SINTTOFP_I128_F128, "__floattitf") HANDLE_LIBCALL(SINTTOFP_I128_PPCF128, "__floattitf") +HANDLE_LIBCALL(UINTTOFP_I32_F16, "__floatunsihf") HANDLE_LIBCALL(UINTTOFP_I32_F32, "__floatunsisf") HANDLE_LIBCALL(UINTTOFP_I32_F64, "__floatunsidf") HANDLE_LIBCALL(UINTTOFP_I32_F80, "__floatunsixf") HANDLE_LIBCALL(UINTTOFP_I32_F128, "__floatunsitf") HANDLE_LIBCALL(UINTTOFP_I32_PPCF128, "__gcc_utoq") +HANDLE_LIBCALL(UINTTOFP_I64_F16, "__floatundihf") HANDLE_LIBCALL(UINTTOFP_I64_F32, "__floatundisf") HANDLE_LIBCALL(UINTTOFP_I64_F64, "__floatundidf") HANDLE_LIBCALL(UINTTOFP_I64_F80, "__floatundixf") HANDLE_LIBCALL(UINTTOFP_I64_F128, "__floatunditf") HANDLE_LIBCALL(UINTTOFP_I64_PPCF128, "__floatunditf") +HANDLE_LIBCALL(UINTTOFP_I128_F16, "__floatuntihf") HANDLE_LIBCALL(UINTTOFP_I128_F32, "__floatuntisf") HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf") HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf") diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -224,6 +224,8 @@ if (OpVT == MVT::f16) { if (RetVT == MVT::f32) return FPEXT_F16_F32; + if (RetVT == MVT::f64) + return FPEXT_F16_F64; if (RetVT == MVT::f128) return FPEXT_F16_F128; } else if (OpVT == MVT::f32) { @@ -287,7 +289,14 @@ /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { + if (OpVT == MVT::f16) { + if (RetVT == MVT::i32) + return FPTOSINT_F16_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F16_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F16_I128; + } else if (OpVT == MVT::f32) { if (RetVT == MVT::i32) return FPTOSINT_F32_I32; if (RetVT == MVT::i64) @@ -329,7 +338,14 @@ /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { + if (OpVT == MVT::f16) { + if (RetVT == MVT::i32) + return FPTOUINT_F16_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F16_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F16_I128; + } else if (OpVT == MVT::f32) { if (RetVT == MVT::i32) return FPTOUINT_F32_I32; if (RetVT == MVT::i64) @@ -372,6 +388,8 @@ /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { if (OpVT == MVT::i32) { + if (RetVT == MVT::f16) + return SINTTOFP_I32_F16; if (RetVT == MVT::f32) return SINTTOFP_I32_F32; if (RetVT == MVT::f64) @@ -383,6 +401,8 @@ if (RetVT == MVT::ppcf128) return SINTTOFP_I32_PPCF128; } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f16) + return SINTTOFP_I64_F16; if (RetVT == MVT::f32) return SINTTOFP_I64_F32; if (RetVT == MVT::f64) @@ -394,6 +414,8 @@ if (RetVT == MVT::ppcf128) return SINTTOFP_I64_PPCF128; } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f16) + return SINTTOFP_I128_F16; if (RetVT == MVT::f32) return SINTTOFP_I128_F32; if (RetVT == MVT::f64) @@ -412,6 +434,8 @@ /// UNKNOWN_LIBCALL if there is none. RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { if (OpVT == MVT::i32) { + if (RetVT == MVT::f16) + return UINTTOFP_I32_F16; if (RetVT == MVT::f32) return UINTTOFP_I32_F32; if (RetVT == MVT::f64) @@ -423,6 +447,8 @@ if (RetVT == MVT::ppcf128) return UINTTOFP_I32_PPCF128; } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f16) + return UINTTOFP_I64_F16; if (RetVT == MVT::f32) return UINTTOFP_I64_F32; if (RetVT == MVT::f64) @@ -434,6 +460,8 @@ if (RetVT == MVT::ppcf128) return UINTTOFP_I64_PPCF128; } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f16) + return UINTTOFP_I128_F16; if (RetVT == MVT::f32) return UINTTOFP_I128_F32; if (RetVT == MVT::f64) diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -982,6 +982,11 @@ #define GET_MNEMONIC_SPELL_CHECKER #include "RISCVGenAsmMatcher.inc" +static MCRegister convertFPR64ToFPR16(MCRegister Reg) { + assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register"); + return Reg - RISCV::F0_D + RISCV::F0_H; +} + static MCRegister convertFPR64ToFPR32(MCRegister Reg) { assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register"); return Reg - RISCV::F0_D + RISCV::F0_F; @@ -1006,6 +1011,12 @@ Op.Reg.RegNum = convertFPR64ToFPR32(Reg); return Match_Success; } + // As the parser couldn't differentiate an FPR16 from an FPR64, coerce the + // register from FPR64 to FPR16 if necessary. + if (IsRegFPR64 && Kind == MCK_FPR16) { + Op.Reg.RegNum = convertFPR64ToFPR16(Reg); + return Match_Success; + } return Match_InvalidOperand; } @@ -1237,10 +1248,12 @@ static bool matchRegisterNameHelper(bool IsRV32E, MCRegister &RegNo, StringRef Name) { RegNo = MatchRegisterName(Name); - // The 32- and 64-bit FPRs have the same asm name. Check that the initial - // match always matches the 64-bit variant, and not the 32-bit one. + // The 16-/32- and 64-bit FPRs have the same asm name. Check that the initial + // match always matches the 64-bit variant, and not the 16/32-bit one. + assert(!(RegNo >= RISCV::F0_H && RegNo <= RISCV::F31_H)); assert(!(RegNo >= RISCV::F0_F && RegNo <= RISCV::F31_F)); // The default FPR register class is based on the tablegen enum ordering. + static_assert(RISCV::F0_D < RISCV::F0_H, "FPR matching must be updated"); static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated"); if (RegNo == RISCV::NoRegister) RegNo = MatchRegisterAltName(Name); @@ -2414,6 +2427,9 @@ case RISCV::PseudoLD: emitLoadStoreSymbol(Inst, RISCV::LD, IDLoc, Out, /*HasTmpReg=*/false); return false; + case RISCV::PseudoFLH: + emitLoadStoreSymbol(Inst, RISCV::FLH, IDLoc, Out, /*HasTmpReg=*/true); + return false; case RISCV::PseudoFLW: emitLoadStoreSymbol(Inst, RISCV::FLW, IDLoc, Out, /*HasTmpReg=*/true); return false; @@ -2432,6 +2448,9 @@ case RISCV::PseudoSD: emitLoadStoreSymbol(Inst, RISCV::SD, IDLoc, Out, /*HasTmpReg=*/true); return false; + case RISCV::PseudoFSH: + emitLoadStoreSymbol(Inst, RISCV::FSH, IDLoc, Out, /*HasTmpReg=*/true); + return false; case RISCV::PseudoFSW: emitLoadStoreSymbol(Inst, RISCV::FSW, IDLoc, Out, /*HasTmpReg=*/true); return false; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -75,6 +75,17 @@ return MCDisassembler::Success; } +static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo >= 32) + return MCDisassembler::Fail; + + MCRegister Reg = RISCV::F0_H + RegNo; + Inst.addOperand(MCOperand::createReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -41,6 +41,14 @@ AssemblerPredicate<(all_of FeatureStdExtD), "'D' (Double-Precision Floating-Point)">; +def FeatureExtZfh + : SubtargetFeature<"experimental-zfh", "HasStdExtZfh", "true", + "'Zfh' (Half-Precision Floating-Point)", + [FeatureStdExtF]>; +def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">, + AssemblerPredicate<(all_of FeatureExtZfh), + "'Zfh' (Half-Precision Floating-Point)">; + def FeatureStdExtC : SubtargetFeature<"c", "HasStdExtC", "true", "'C' (Compressed Instructions)">; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -662,14 +662,14 @@ for (unsigned i = 0; CSRegs[i]; ++i) SavedRegs.set(CSRegs[i]); - if (MF.getSubtarget().hasStdExtD() || - MF.getSubtarget().hasStdExtF()) { + if (MF.getSubtarget().hasStdExtF()) { // If interrupt is enabled, this list contains all FP registers. const MCPhysReg * Regs = MF.getRegInfo().getCalleeSavedRegs(); for (unsigned i = 0; Regs[i]; ++i) - if (RISCV::FPR32RegClass.contains(Regs[i]) || + if (RISCV::FPR16RegClass.contains(Regs[i]) || + RISCV::FPR32RegClass.contains(Regs[i]) || RISCV::FPR64RegClass.contains(Regs[i])) SavedRegs.set(Regs[i]); } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -404,6 +404,7 @@ case RISCV::LHU: case RISCV::LWU: case RISCV::LD: + case RISCV::FLH: case RISCV::FLW: case RISCV::FLD: BaseOpIdx = 0; @@ -413,6 +414,7 @@ case RISCV::SH: case RISCV::SW: case RISCV::SD: + case RISCV::FSH: case RISCV::FSW: case RISCV::FSD: BaseOpIdx = 1; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -50,11 +50,21 @@ // but the same operand order as fshl/fshr intrinsics. FSRW, FSLW, - // FPR32<->GPR transfer operations for RV64. Needed as an i32<->f32 bitcast - // is not legal on RV64. FMV_W_X_RV64 matches the semantics of the FMV.W.X. + // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as + // XLEN is the only legal integer width. + // + // FMV_H_X_RV32/RV64 match the semantics of the FMV.H.X. + // FMV_X_ANYEXTH_RV32/RV64 are similar to FMV.X.H but has an any-extended + // result. + // FMV_W_X_RV64 matches the semantics of the FMV.W.X. // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result. + // // This is a more convenient semantic for producing dagcombines that remove // unnecessary GPR->FPR->GPR moves. + FMV_H_X_RV32, + FMV_H_X_RV64, + FMV_X_ANYEXTH_RV32, + FMV_X_ANYEXTH_RV64, FMV_W_X_RV64, FMV_X_ANYEXTW_RV64, // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -82,6 +82,8 @@ // Set up the register classes. addRegisterClass(XLenVT, &RISCV::GPRRegClass); + if (Subtarget.hasStdExtZfh()) + addRegisterClass(MVT::f16, &RISCV::FPR16RegClass); if (Subtarget.hasStdExtF()) addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); if (Subtarget.hasStdExtD()) @@ -203,6 +205,21 @@ ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; + if (Subtarget.hasStdExtZfh()) + setOperationAction(ISD::BITCAST, MVT::i16, Custom); + + if (Subtarget.hasStdExtZfh()) { + setOperationAction(ISD::FMINNUM, MVT::f16, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f16, Legal); + for (auto CC : FPCCToExtend) + setCondCodeAction(CC, MVT::f16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); + setOperationAction(ISD::SELECT, MVT::f16, Custom); + setOperationAction(ISD::BR_CC, MVT::f16, Expand); + for (auto Op : FPOpToExtend) + setOperationAction(Op, MVT::f16, Expand); + } + if (Subtarget.hasStdExtF()) { setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); @@ -403,6 +420,8 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { + if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) + return false; if (VT == MVT::f32 && !Subtarget.hasStdExtF()) return false; if (VT == MVT::f64 && !Subtarget.hasStdExtD()) @@ -413,7 +432,8 @@ } bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { - return (VT == MVT::f32 && Subtarget.hasStdExtF()) || + return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) || + (VT == MVT::f32 && Subtarget.hasStdExtF()) || (VT == MVT::f64 && Subtarget.hasStdExtD()); } @@ -484,15 +504,33 @@ case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); case ISD::BITCAST: { - assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() && + assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) || + Subtarget.hasStdExtZfh()) && "Unexpected custom legalisation"); SDLoc DL(Op); SDValue Op0 = Op.getOperand(0); - if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32) - return SDValue(); - SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); - SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); - return FPConv; + if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) { + if (Op0.getValueType() != MVT::i16) + return SDValue(); + unsigned Opcode = RISCVISD::FMV_H_X_RV32; + EVT ExtType = MVT::i32; + if (Subtarget.is64Bit()) { + Opcode = RISCVISD::FMV_H_X_RV64; + ExtType = MVT::i64; + } + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, ExtType, Op0); + SDValue FPConv = DAG.getNode(Opcode, DL, MVT::f16, NewOp0); + return FPConv; + } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() && + Subtarget.hasStdExtF()) { + if (Op0.getValueType() != MVT::i32) + return SDValue(); + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + SDValue FPConv = + DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); + return FPConv; + } + return SDValue(); } case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); @@ -1073,14 +1111,30 @@ Results.push_back(customLegalizeToWOp(N, DAG)); break; case ISD::BITCAST: { - assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && - Subtarget.hasStdExtF() && "Unexpected custom legalisation"); + assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtF()) || + (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) && + "Unexpected custom legalisation"); SDValue Op0 = N->getOperand(0); - if (Op0.getValueType() != MVT::f32) - return; - SDValue FPConv = - DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); - Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); + if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) { + if (Op0.getValueType() != MVT::f16) + return; + unsigned Opcode = RISCVISD::FMV_X_ANYEXTH_RV32; + EVT ExtType = MVT::i32; + if (Subtarget.is64Bit()) { + Opcode = RISCVISD::FMV_X_ANYEXTH_RV64; + ExtType = MVT::i64; + } + SDValue FPConv = DAG.getNode(Opcode, DL, ExtType, Op0); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv)); + } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtF()) { + if (Op0.getValueType() != MVT::f32) + return; + SDValue FPConv = + DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); + } break; } case RISCVISD::GREVI: @@ -1682,6 +1736,7 @@ default: return false; case RISCV::Select_GPR_Using_CC_GPR: + case RISCV::Select_FPR16_Using_CC_GPR: case RISCV::Select_FPR32_Using_CC_GPR: case RISCV::Select_FPR64_Using_CC_GPR: return true; @@ -1822,6 +1877,7 @@ "ReadCycleWrite is only to be used on riscv32"); return emitReadCycleWidePseudo(MI, BB); case RISCV::Select_GPR_Using_CC_GPR: + case RISCV::Select_FPR16_Using_CC_GPR: case RISCV::Select_FPR32_Using_CC_GPR: case RISCV::Select_FPR64_Using_CC_GPR: return emitSelectPseudo(MI, BB); @@ -1861,6 +1917,10 @@ RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 }; +static const MCPhysReg ArgFPR16s[] = { + RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, + RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H +}; static const MCPhysReg ArgFPR32s[] = { RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F @@ -1923,9 +1983,9 @@ if (IsRet && ValNo > 1) return true; - // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a - // variadic argument, or if no F32 argument registers are available. - bool UseGPRForF32 = true; + // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a + // variadic argument, or if no F16/F32 argument registers are available. + bool UseGPRForF16_F32 = true; // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a // variadic argument, or if no F64 argument registers are available. bool UseGPRForF64 = true; @@ -1938,24 +1998,26 @@ break; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: - UseGPRForF32 = !IsFixed; + UseGPRForF16_F32 = !IsFixed; break; case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64D: - UseGPRForF32 = !IsFixed; + UseGPRForF16_F32 = !IsFixed; UseGPRForF64 = !IsFixed; break; } - if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) - UseGPRForF32 = true; - if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s)) + // FPR16, FPR32, and FPR64 alias each other. + if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) { + UseGPRForF16_F32 = true; UseGPRForF64 = true; + } - // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local - // variables rather than directly checking against the target ABI. + // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and + // similar local variables rather than directly checking against the target + // ABI. - if (UseGPRForF32 && ValVT == MVT::f32) { + if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) { LocVT = XLenVT; LocInfo = CCValAssign::BCvt; } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { @@ -2038,7 +2100,9 @@ // Allocate to a register if possible, or else a stack slot. Register Reg; - if (ValVT == MVT::f32 && !UseGPRForF32) + if (ValVT == MVT::f16 && !UseGPRForF16_F32) + Reg = State.AllocateReg(ArgFPR16s); + else if (ValVT == MVT::f32 && !UseGPRForF16_F32) Reg = State.AllocateReg(ArgFPR32s); else if (ValVT == MVT::f64 && !UseGPRForF64) Reg = State.AllocateReg(ArgFPR64s); @@ -2065,7 +2129,7 @@ return false; } - assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) && + assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT) && "Expected an XLenVT at this stage"); if (Reg) { @@ -2073,8 +2137,9 @@ return false; } - // When an f32 or f64 is passed on the stack, no bit-conversion is needed. - if (ValVT == MVT::f32 || ValVT == MVT::f64) { + // When a floating-point value is passed on the stack, no bit-conversion is + // needed. + if (ValVT.isFloatingPoint()) { LocVT = ValVT; LocInfo = CCValAssign::Full; } @@ -2139,11 +2204,14 @@ case CCValAssign::Full: break; case CCValAssign::BCvt: - if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f16) + Val = DAG.getNode(RISCVISD::FMV_H_X_RV32, DL, MVT::f16, Val); + else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f16) + Val = DAG.getNode(RISCVISD::FMV_H_X_RV64, DL, MVT::f16, Val); + else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); - break; - } - Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); + else + Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; } return Val; @@ -2166,6 +2234,9 @@ case MVT::i64: RC = &RISCV::GPRRegClass; break; + case MVT::f16: + RC = &RISCV::FPR16RegClass; + break; case MVT::f32: RC = &RISCV::FPR32RegClass; break; @@ -2194,11 +2265,14 @@ case CCValAssign::Full: break; case CCValAssign::BCvt: - if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f16) + Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH_RV32, DL, MVT::i32, Val); + else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f16) + Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH_RV64, DL, MVT::i64, Val); + else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); - break; - } - Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); + else + Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); break; } return Val; @@ -2289,6 +2363,18 @@ } } + if (LocVT == MVT::f16) { + static const MCPhysReg FPR16List[] = { + RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H, + RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H, + RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H, + RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H}; + if (unsigned Reg = State.AllocateReg(FPR16List)) { + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + } + if (LocVT == MVT::f32) { static const MCPhysReg FPR32List[] = { RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F, @@ -3037,6 +3123,10 @@ NODE_NAME_CASE(RORW) NODE_NAME_CASE(FSLW) NODE_NAME_CASE(FSRW) + NODE_NAME_CASE(FMV_H_X_RV32) + NODE_NAME_CASE(FMV_H_X_RV64) + NODE_NAME_CASE(FMV_X_ANYEXTH_RV32) + NODE_NAME_CASE(FMV_X_ANYEXTH_RV64) NODE_NAME_CASE(FMV_W_X_RV64) NODE_NAME_CASE(FMV_X_ANYEXTW_RV64) NODE_NAME_CASE(READ_CYCLE_WIDE) @@ -3082,6 +3172,8 @@ case 'r': return std::make_pair(0U, &RISCV::GPRRegClass); case 'f': + if (Subtarget.hasStdExtZfh() && VT == MVT::f16) + return std::make_pair(0U, &RISCV::FPR16RegClass); if (Subtarget.hasStdExtF() && VT == MVT::f32) return std::make_pair(0U, &RISCV::FPR32RegClass); if (Subtarget.hasStdExtD() && VT == MVT::f64) @@ -3140,7 +3232,7 @@ // // The second case is the ABI name of the register, so that frontends can also // use the ABI names in register constraint lists. - if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) { + if (Subtarget.hasStdExtF()) { unsigned FReg = StringSwitch(Constraint.lower()) .Cases("{f0}", "{ft0}", RISCV::F0_F) .Cases("{f1}", "{ft1}", RISCV::F1_F) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -45,6 +45,7 @@ case RISCV::LBU: case RISCV::LH: case RISCV::LHU: + case RISCV::FLH: case RISCV::LW: case RISCV::FLW: case RISCV::LWU: @@ -70,6 +71,7 @@ case RISCV::SB: case RISCV::SH: case RISCV::SW: + case RISCV::FSH: case RISCV::FSW: case RISCV::SD: case RISCV::FSD: @@ -98,7 +100,9 @@ // FPR->FPR copies unsigned Opc; - if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) + if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) + Opc = RISCV::FSGNJ_H; + else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) Opc = RISCV::FSGNJ_S; else if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) Opc = RISCV::FSGNJ_D; @@ -129,6 +133,8 @@ if (RISCV::GPRRegClass.hasSubClassEq(RC)) Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; + else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) + Opcode = RISCV::FSH; else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) Opcode = RISCV::FSW; else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) @@ -162,6 +168,8 @@ if (RISCV::GPRRegClass.hasSubClassEq(RC)) Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; + else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) + Opcode = RISCV::FLH; else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) Opcode = RISCV::FLW; else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1195,3 +1195,4 @@ include "RISCVInstrInfoC.td" include "RISCVInstrInfoB.td" include "RISCVInstrInfoV.td" +include "RISCVInstrInfoZfh.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -0,0 +1,398 @@ +//===-- RISCVInstrInfoFH.td - RISC-V 'FH' instructions -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the RISC-V instructions from the standard 'Zfh' +// half-precision floating-point extension, version 0.1. +// This version is still experimental as the 'Zfh' extension hasn't been +// ratified yet. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// RISC-V specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDT_RISCVFMV_H_X_RV32 + : SDTypeProfile<1, 1, [SDTCisVT<0, f16>, SDTCisVT<1, i32>]>; +def SDT_RISCVFMV_H_X_RV64 + : SDTypeProfile<1, 1, [SDTCisVT<0, f16>, SDTCisVT<1, i64>]>; +def SDT_RISCVFMV_X_ANYEXTH_RV64 + : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f16>]>; +def SDT_RISCVFMV_X_ANYEXTH_RV32 + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, f16>]>; + +def riscv_fmv_h_x_rv32 + : SDNode<"RISCVISD::FMV_H_X_RV32", SDT_RISCVFMV_H_X_RV32>; +def riscv_fmv_h_x_rv64 + : SDNode<"RISCVISD::FMV_H_X_RV64", SDT_RISCVFMV_H_X_RV64>; +def riscv_fmv_x_anyexth_rv64 + : SDNode<"RISCVISD::FMV_X_ANYEXTH_RV64", SDT_RISCVFMV_X_ANYEXTH_RV64>; +def riscv_fmv_x_anyexth_rv32 + : SDNode<"RISCVISD::FMV_X_ANYEXTH_RV32", SDT_RISCVFMV_X_ANYEXTH_RV32>; + +//===----------------------------------------------------------------------===// +// Instruction class templates +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPFMAH_rrr_frm + : RVInstR4<0b10, opcode, (outs FPR16:$rd), + (ins FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, frmarg:$funct3), + opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">; + +class FPFMAHDynFrmAlias + : InstAlias; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPALUH_rr funct7, bits<3> funct3, string opcodestr> + : RVInstR; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPALUH_rr_frm funct7, string opcodestr> + : RVInstRFrm; + +class FPALUHDynFrmAlias + : InstAlias; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class FPCmpH_rr funct3, string opcodestr> + : RVInstR<0b1010010, funct3, OPC_OP_FP, (outs GPR:$rd), + (ins FPR16:$rs1, FPR16:$rs2), opcodestr, "$rd, $rs1, $rs2">, + Sched<[]>; + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZfh] in { +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def FLH : RVInstI<0b001, OPC_LOAD_FP, (outs FPR16:$rd), + (ins GPR:$rs1, simm12:$imm12), + "flh", "$rd, ${imm12}(${rs1})">, + Sched<[]>; + +// Operands for stores are in the order srcreg, base, offset rather than +// reflecting the order these fields are specified in the instruction +// encoding. +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def FSH : RVInstS<0b001, OPC_STORE_FP, (outs), + (ins FPR16:$rs2, GPR:$rs1, simm12:$imm12), + "fsh", "$rs2, ${imm12}(${rs1})">, + Sched<[]>; + +def FMADD_H : FPFMAH_rrr_frm, + Sched<[]>; +def : FPFMAHDynFrmAlias; +def FMSUB_H : FPFMAH_rrr_frm, + Sched<[]>; +def : FPFMAHDynFrmAlias; +def FNMSUB_H : FPFMAH_rrr_frm, + Sched<[]>; +def : FPFMAHDynFrmAlias; +def FNMADD_H : FPFMAH_rrr_frm, + Sched<[]>; +def : FPFMAHDynFrmAlias; + +def FADD_H : FPALUH_rr_frm<0b0000010, "fadd.h">, + Sched<[]>; +def : FPALUHDynFrmAlias; +def FSUB_H : FPALUH_rr_frm<0b0000110, "fsub.h">, + Sched<[]>; +def : FPALUHDynFrmAlias; +def FMUL_H : FPALUH_rr_frm<0b0001010, "fmul.h">, + Sched<[]>; +def : FPALUHDynFrmAlias; +def FDIV_H : FPALUH_rr_frm<0b0001110, "fdiv.h">, + Sched<[]>; +def : FPALUHDynFrmAlias; + +def FSQRT_H : FPUnaryOp_r_frm<0b0101110, FPR16, FPR16, "fsqrt.h">, + Sched<[]> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias; + +def FSGNJ_H : FPALUH_rr<0b0010010, 0b000, "fsgnj.h">, + Sched<[]>; +def FSGNJN_H : FPALUH_rr<0b0010010, 0b001, "fsgnjn.h">, + Sched<[]>; +def FSGNJX_H : FPALUH_rr<0b0010010, 0b010, "fsgnjx.h">, + Sched<[]>; + +def FMIN_H : FPALUH_rr<0b0010110, 0b000, "fmin.h">, + Sched<[]>; +def FMAX_H : FPALUH_rr<0b0010110, 0b001, "fmax.h">, + Sched<[]>; + +def FCVT_W_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.w.h">, + Sched<[]> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_WU_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.wu.h">, + Sched<[]> { + let rs2 = 0b00001; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_H_W : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.w">, + Sched<[]> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.wu">, + Sched<[]> { + let rs2 = 0b00001; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, FPR16, FPR32, "fcvt.h.s">, + Sched<[]> { + let rs2 = 0b00000; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_S_H : FPUnaryOp_r<0b0100000, 0b000, FPR32, FPR16, "fcvt.s.h">, + Sched<[]> { + let rs2 = 0b00010; +} + +def FMV_X_H : FPUnaryOp_r<0b1110010, 0b000, GPR, FPR16, "fmv.x.h">, + Sched<[]> { + let rs2 = 0b00000; +} + +def FMV_H_X : FPUnaryOp_r<0b1111010, 0b000, FPR16, GPR, "fmv.h.x">, + Sched<[]> { + let rs2 = 0b00000; +} + +def FEQ_H : FPCmpH_rr<0b010, "feq.h">; +def FLT_H : FPCmpH_rr<0b001, "flt.h">; +def FLE_H : FPCmpH_rr<0b000, "fle.h">; + +def FCLASS_H : FPUnaryOp_r<0b1110010, 0b001, GPR, FPR16, "fclass.h">, + Sched<[]> { + let rs2 = 0b00000; +} +} // Predicates = [HasStdExtZfh] + +let Predicates = [HasStdExtZfh, IsRV64] in { +def FCVT_L_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.l.h">, + Sched<[]> { + let rs2 = 0b00010; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_LU_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.lu.h">, + Sched<[]> { + let rs2 = 0b00011; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_H_L : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.l">, + Sched<[]> { + let rs2 = 0b00010; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.lu">, + Sched<[]> { + let rs2 = 0b00011; +} +def : FPUnaryOpDynFrmAlias; +} // Predicates = [HasStdExtZfh, IsRV64] + +let Predicates = [HasStdExtZfh, HasStdExtD] in { +def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, FPR16, FPR64, "fcvt.h.d">, + Sched<[]> { + let rs2 = 0b00001; +} +def : FPUnaryOpDynFrmAlias; + +def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR16, "fcvt.d.h">, + Sched<[]> { + let rs2 = 0b00010; +} +} // Predicates = [HasStdExtZfh, HasStdExtD] + +//===----------------------------------------------------------------------===// +// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) +//===----------------------------------------------------------------------===// + +let Predicates = [HasStdExtZfh] in { +def : InstAlias<"flh $rd, (${rs1})", (FLH FPR16:$rd, GPR:$rs1, 0), 0>; +def : InstAlias<"fsh $rs2, (${rs1})", (FSH FPR16:$rs2, GPR:$rs1, 0), 0>; + +def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>; +def : InstAlias<"fabs.h $rd, $rs", (FSGNJX_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>; +def : InstAlias<"fneg.h $rd, $rs", (FSGNJN_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>; + +// fgt.h/fge.h are recognised by the GNU assembler but the canonical +// flt.h/fle.h forms will always be printed. Therefore, set a zero weight. +def : InstAlias<"fgt.h $rd, $rs, $rt", + (FLT_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; +def : InstAlias<"fge.h $rd, $rs, $rt", + (FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; + +def PseudoFLH : PseudoFloatLoad<"flh", FPR16>; +def PseudoFSH : PseudoStore<"fsh", FPR16>; +} // Predicates = [HasStdExtZfh] + +//===----------------------------------------------------------------------===// +// Pseudo-instructions and codegen patterns +//===----------------------------------------------------------------------===// + +/// Generic pattern classes +class PatFpr16Fpr16 + : Pat<(OpNode FPR16:$rs1, FPR16:$rs2), (Inst $rs1, $rs2)>; + +class PatFpr16Fpr16DynFrm + : Pat<(OpNode FPR16:$rs1, FPR16:$rs2), (Inst $rs1, $rs2, 0b111)>; + +let Predicates = [HasStdExtZfh] in { + +/// Float constants +def : Pat<(f16 (fpimm0)), (FMV_H_X X0)>; + +/// Float conversion operations + +// [u]int32<->float conversion patterns must be gated on IsRV32 or IsRV64, so +// are defined later. + +/// Float arithmetic operations + +def : PatFpr16Fpr16DynFrm; +def : PatFpr16Fpr16DynFrm; +def : PatFpr16Fpr16DynFrm; +def : PatFpr16Fpr16DynFrm; + +def : Pat<(fsqrt FPR16:$rs1), (FSQRT_H FPR16:$rs1, 0b111)>; + +def : Pat<(fneg FPR16:$rs1), (FSGNJN_H $rs1, $rs1)>; +def : Pat<(fabs FPR16:$rs1), (FSGNJX_H $rs1, $rs1)>; + +def : PatFpr16Fpr16; +def : Pat<(fcopysign FPR16:$rs1, (fneg FPR16:$rs2)), (FSGNJN_H $rs1, $rs2)>; +def : Pat<(fcopysign FPR16:$rs1, FPR32:$rs2), + (FSGNJ_H $rs1, (FCVT_H_S $rs2, 0b111))>; +def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2), + (FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>; +def : Pat<(fcopysign FPR32:$rs1, FPR16:$rs2), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>; +def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>; + +// fmadd: rs1 * rs2 + rs3 +def : Pat<(fma FPR16:$rs1, FPR16:$rs2, FPR16:$rs3), + (FMADD_H $rs1, $rs2, $rs3, 0b111)>; + +// fmsub: rs1 * rs2 - rs3 +def : Pat<(fma FPR16:$rs1, FPR16:$rs2, (fneg FPR16:$rs3)), + (FMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; + +// fnmsub: -rs1 * rs2 + rs3 +def : Pat<(fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3), + (FNMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; + +// fnmadd: -rs1 * rs2 - rs3 +def : Pat<(fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)), + (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; + +def : PatFpr16Fpr16; +def : PatFpr16Fpr16; + +/// Setcc + +def : PatFpr16Fpr16; +def : PatFpr16Fpr16; +def : PatFpr16Fpr16; +def : PatFpr16Fpr16; +def : PatFpr16Fpr16; +def : PatFpr16Fpr16; + +// Define pattern expansions for setcc operations which aren't directly +// handled by a RISC-V instruction and aren't expanded in the SelectionDAG +// Legalizer. + +def : Pat<(seto FPR16:$rs1, FPR16:$rs2), + (AND (FEQ_H FPR16:$rs1, FPR16:$rs1), + (FEQ_H FPR16:$rs2, FPR16:$rs2))>; +def : Pat<(seto FPR16:$rs1, FPR16:$rs1), + (FEQ_H $rs1, $rs1)>; + +def : Pat<(setuo FPR16:$rs1, FPR16:$rs2), + (SLTIU (AND (FEQ_H FPR16:$rs1, FPR16:$rs1), + (FEQ_H FPR16:$rs2, FPR16:$rs2)), + 1)>; +def : Pat<(setuo FPR16:$rs1, FPR16:$rs1), + (SLTIU (FEQ_H $rs1, $rs1), 1)>; + +def Select_FPR16_Using_CC_GPR : SelectCC_rrirr; + +/// Loads + +defm : LdPat; + +/// Stores + +defm : StPat; + +/// Float conversion operations +// f32 -> f16, f16 -> f32 +def : Pat<(fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>; +def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; + +} // Predicates = [HasStdExtZfh] + +let Predicates = [HasStdExtZfh, IsRV32] in { +def : Pat<(riscv_fmv_h_x_rv32 GPR:$src), (FMV_H_X GPR:$src)>; +def : Pat<(riscv_fmv_x_anyexth_rv32 FPR16:$src), (FMV_X_H FPR16:$src)>; + +// float->[u]int. Round-to-zero must be used. +def : Pat<(fp_to_sint FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(fp_to_uint FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; + +// [u]int->float. Match GCC and default to using dynamic rounding mode. +def : Pat<(sint_to_fp GPR:$rs1), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(uint_to_fp GPR:$rs1), (FCVT_H_WU $rs1, 0b111)>; +} // Predicates = [HasStdExtZfh, IsRV32] + +let Predicates = [HasStdExtZfh, IsRV64] in { +def : Pat<(riscv_fmv_h_x_rv64 GPR:$src), (FMV_H_X GPR:$src)>; +def : Pat<(riscv_fmv_x_anyexth_rv64 FPR16:$src), (FMV_X_H FPR16:$src)>; + +// FP->[u]int32 is mostly handled by the FP->[u]int64 patterns. This is safe +// because fpto[u|s]i produces poison if the value can't fit into the target. +// We match the single case below because fcvt.wu.s sign-extends its result so +// is cheaper than fcvt.lu.h+sext.w. +def : Pat<(sext_inreg (assertzexti32 (fp_to_uint FPR16:$rs1)), i32), + (FCVT_WU_H $rs1, 0b001)>; + +// FP->[u]int64 +def : Pat<(fp_to_sint FPR16:$rs1), (FCVT_L_H $rs1, 0b001)>; +def : Pat<(fp_to_uint FPR16:$rs1), (FCVT_LU_H $rs1, 0b001)>; + +// [u]int->fp. Match GCC and default to using dynamic rounding mode. +def : Pat<(sint_to_fp (sext_inreg GPR:$rs1, i32)), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(uint_to_fp (zexti32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>; +def : Pat<(sint_to_fp GPR:$rs1), (FCVT_H_L $rs1, 0b111)>; +def : Pat<(uint_to_fp GPR:$rs1), (FCVT_H_LU $rs1, 0b111)>; +} // Predicates = [HasStdExtZfh, IsRV64] + +let Predicates = [HasStdExtZfh, HasStdExtD] in { +/// Float conversion operations +// f64 -> f16, f16 -> f64 +def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; +def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; +} diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -216,12 +216,14 @@ case RISCV::LHU: case RISCV::LWU: case RISCV::LD: + case RISCV::FLH: case RISCV::FLW: case RISCV::FLD: case RISCV::SB: case RISCV::SH: case RISCV::SW: case RISCV::SD: + case RISCV::FSH: case RISCV::FSW: case RISCV::FSD: { // Transforms the sequence: Into: diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -29,6 +29,9 @@ static_assert(RISCV::X1 == RISCV::X0 + 1, "Register list not consecutive"); static_assert(RISCV::X31 == RISCV::X0 + 31, "Register list not consecutive"); +static_assert(RISCV::F1_H == RISCV::F0_H + 1, "Register list not consecutive"); +static_assert(RISCV::F31_H == RISCV::F0_H + 31, + "Register list not consecutive"); static_assert(RISCV::F1_F == RISCV::F0_F + 1, "Register list not consecutive"); static_assert(RISCV::F31_F == RISCV::F0_F + 31, "Register list not consecutive"); diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -16,14 +16,23 @@ let AltNames = alt; } -class RISCVReg32 Enc, string n, list alt = []> : Register { +class RISCVReg16 Enc, string n, list alt = []> : Register { let HWEncoding{4-0} = Enc; let AltNames = alt; } +def sub_16 : SubRegIndex<16>; +class RISCVReg32 : Register<""> { + let HWEncoding{4-0} = subreg.HWEncoding{4-0}; + let SubRegs = [subreg]; + let SubRegIndices = [sub_16]; + let AsmName = subreg.AsmName; + let AltNames = subreg.AltNames; +} + // Because RISCVReg64 register have AsmName and AltNames that alias with their -// 32-bit sub-register, RISCVAsmParser will need to coerce a register number -// from a RISCVReg32 to the equivalent RISCVReg64 when appropriate. +// 16/32-bit sub-register, RISCVAsmParser will need to coerce a register number +// from a RISCVReg16/RISCVReg32 to the equivalent RISCVReg64 when appropriate. def sub_32 : SubRegIndex<32>; class RISCVReg64 : Register<""> { let HWEncoding{4-0} = subreg.HWEncoding{4-0}; @@ -179,38 +188,43 @@ // Floating point registers let RegAltNameIndices = [ABIRegAltName] in { - def F0_F : RISCVReg32<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; - def F1_F : RISCVReg32<1, "f1", ["ft1"]>, DwarfRegNum<[33]>; - def F2_F : RISCVReg32<2, "f2", ["ft2"]>, DwarfRegNum<[34]>; - def F3_F : RISCVReg32<3, "f3", ["ft3"]>, DwarfRegNum<[35]>; - def F4_F : RISCVReg32<4, "f4", ["ft4"]>, DwarfRegNum<[36]>; - def F5_F : RISCVReg32<5, "f5", ["ft5"]>, DwarfRegNum<[37]>; - def F6_F : RISCVReg32<6, "f6", ["ft6"]>, DwarfRegNum<[38]>; - def F7_F : RISCVReg32<7, "f7", ["ft7"]>, DwarfRegNum<[39]>; - def F8_F : RISCVReg32<8, "f8", ["fs0"]>, DwarfRegNum<[40]>; - def F9_F : RISCVReg32<9, "f9", ["fs1"]>, DwarfRegNum<[41]>; - def F10_F : RISCVReg32<10,"f10", ["fa0"]>, DwarfRegNum<[42]>; - def F11_F : RISCVReg32<11,"f11", ["fa1"]>, DwarfRegNum<[43]>; - def F12_F : RISCVReg32<12,"f12", ["fa2"]>, DwarfRegNum<[44]>; - def F13_F : RISCVReg32<13,"f13", ["fa3"]>, DwarfRegNum<[45]>; - def F14_F : RISCVReg32<14,"f14", ["fa4"]>, DwarfRegNum<[46]>; - def F15_F : RISCVReg32<15,"f15", ["fa5"]>, DwarfRegNum<[47]>; - def F16_F : RISCVReg32<16,"f16", ["fa6"]>, DwarfRegNum<[48]>; - def F17_F : RISCVReg32<17,"f17", ["fa7"]>, DwarfRegNum<[49]>; - def F18_F : RISCVReg32<18,"f18", ["fs2"]>, DwarfRegNum<[50]>; - def F19_F : RISCVReg32<19,"f19", ["fs3"]>, DwarfRegNum<[51]>; - def F20_F : RISCVReg32<20,"f20", ["fs4"]>, DwarfRegNum<[52]>; - def F21_F : RISCVReg32<21,"f21", ["fs5"]>, DwarfRegNum<[53]>; - def F22_F : RISCVReg32<22,"f22", ["fs6"]>, DwarfRegNum<[54]>; - def F23_F : RISCVReg32<23,"f23", ["fs7"]>, DwarfRegNum<[55]>; - def F24_F : RISCVReg32<24,"f24", ["fs8"]>, DwarfRegNum<[56]>; - def F25_F : RISCVReg32<25,"f25", ["fs9"]>, DwarfRegNum<[57]>; - def F26_F : RISCVReg32<26,"f26", ["fs10"]>, DwarfRegNum<[58]>; - def F27_F : RISCVReg32<27,"f27", ["fs11"]>, DwarfRegNum<[59]>; - def F28_F : RISCVReg32<28,"f28", ["ft8"]>, DwarfRegNum<[60]>; - def F29_F : RISCVReg32<29,"f29", ["ft9"]>, DwarfRegNum<[61]>; - def F30_F : RISCVReg32<30,"f30", ["ft10"]>, DwarfRegNum<[62]>; - def F31_F : RISCVReg32<31,"f31", ["ft11"]>, DwarfRegNum<[63]>; + def F0_H : RISCVReg16<0, "f0", ["ft0"]>, DwarfRegNum<[32]>; + def F1_H : RISCVReg16<1, "f1", ["ft1"]>, DwarfRegNum<[33]>; + def F2_H : RISCVReg16<2, "f2", ["ft2"]>, DwarfRegNum<[34]>; + def F3_H : RISCVReg16<3, "f3", ["ft3"]>, DwarfRegNum<[35]>; + def F4_H : RISCVReg16<4, "f4", ["ft4"]>, DwarfRegNum<[36]>; + def F5_H : RISCVReg16<5, "f5", ["ft5"]>, DwarfRegNum<[37]>; + def F6_H : RISCVReg16<6, "f6", ["ft6"]>, DwarfRegNum<[38]>; + def F7_H : RISCVReg16<7, "f7", ["ft7"]>, DwarfRegNum<[39]>; + def F8_H : RISCVReg16<8, "f8", ["fs0"]>, DwarfRegNum<[40]>; + def F9_H : RISCVReg16<9, "f9", ["fs1"]>, DwarfRegNum<[41]>; + def F10_H : RISCVReg16<10,"f10", ["fa0"]>, DwarfRegNum<[42]>; + def F11_H : RISCVReg16<11,"f11", ["fa1"]>, DwarfRegNum<[43]>; + def F12_H : RISCVReg16<12,"f12", ["fa2"]>, DwarfRegNum<[44]>; + def F13_H : RISCVReg16<13,"f13", ["fa3"]>, DwarfRegNum<[45]>; + def F14_H : RISCVReg16<14,"f14", ["fa4"]>, DwarfRegNum<[46]>; + def F15_H : RISCVReg16<15,"f15", ["fa5"]>, DwarfRegNum<[47]>; + def F16_H : RISCVReg16<16,"f16", ["fa6"]>, DwarfRegNum<[48]>; + def F17_H : RISCVReg16<17,"f17", ["fa7"]>, DwarfRegNum<[49]>; + def F18_H : RISCVReg16<18,"f18", ["fs2"]>, DwarfRegNum<[50]>; + def F19_H : RISCVReg16<19,"f19", ["fs3"]>, DwarfRegNum<[51]>; + def F20_H : RISCVReg16<20,"f20", ["fs4"]>, DwarfRegNum<[52]>; + def F21_H : RISCVReg16<21,"f21", ["fs5"]>, DwarfRegNum<[53]>; + def F22_H : RISCVReg16<22,"f22", ["fs6"]>, DwarfRegNum<[54]>; + def F23_H : RISCVReg16<23,"f23", ["fs7"]>, DwarfRegNum<[55]>; + def F24_H : RISCVReg16<24,"f24", ["fs8"]>, DwarfRegNum<[56]>; + def F25_H : RISCVReg16<25,"f25", ["fs9"]>, DwarfRegNum<[57]>; + def F26_H : RISCVReg16<26,"f26", ["fs10"]>, DwarfRegNum<[58]>; + def F27_H : RISCVReg16<27,"f27", ["fs11"]>, DwarfRegNum<[59]>; + def F28_H : RISCVReg16<28,"f28", ["ft8"]>, DwarfRegNum<[60]>; + def F29_H : RISCVReg16<29,"f29", ["ft9"]>, DwarfRegNum<[61]>; + def F30_H : RISCVReg16<30,"f30", ["ft10"]>, DwarfRegNum<[62]>; + def F31_H : RISCVReg16<31,"f31", ["ft11"]>, DwarfRegNum<[63]>; + + foreach Index = 0-31 in { + def F#Index#_F : RISCVReg32("F"#Index#"_H")>, + DwarfRegNum<[!add(Index, 32)]>; + } foreach Index = 0-31 in { def F#Index#_D : RISCVReg64("F"#Index#"_F")>, @@ -220,6 +234,14 @@ // The order of registers represents the preferred allocation sequence, // meaning caller-save regs are listed before callee-save. +def FPR16 : RegisterClass<"RISCV", [f16], 16, (add + (sequence "F%u_H", 0, 7), + (sequence "F%u_H", 10, 17), + (sequence "F%u_H", 28, 31), + (sequence "F%u_H", 8, 9), + (sequence "F%u_H", 18, 27) +)>; + def FPR32 : RegisterClass<"RISCV", [f32], 32, (add (sequence "F%u_F", 0, 7), (sequence "F%u_F", 10, 17), diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -53,6 +53,7 @@ bool HasStdExtV = false; bool HasStdExtZvlsseg = false; bool HasStdExtZvamo = false; + bool HasStdExtZfh = false; bool HasRV64 = false; bool IsRV32E = false; bool EnableLinkerRelax = false; @@ -118,6 +119,7 @@ bool hasStdExtV() const { return HasStdExtV; } bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; } bool hasStdExtZvamo() const { return HasStdExtZvamo; } + bool hasStdExtZfh() const { return HasStdExtZfh; } bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } bool enableLinkerRelax() const { return EnableLinkerRelax; } diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll --- a/llvm/test/CodeGen/RISCV/copysign-casts.ll +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -9,15 +9,25 @@ ; RUN: -target-abi ilp32d < %s | FileCheck %s -check-prefix=RV32IFD ; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \ ; RUN: -target-abi lp64d < %s | FileCheck %s -check-prefix=RV64IFD +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f \ +; RUN: -mattr=+experimental-zfh -target-abi ilp32f < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IFZFH +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -mattr=+experimental-zfh -target-abi ilp32d < %s \ +; RUN: | FileCheck %s -check-prefix=RV32IFDZFH +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -mattr=+experimental-zfh -target-abi lp64d < %s \ +; RUN: | FileCheck %s -check-prefix=RV64IFDZFH ; Test fcopysign scenarios where the sign argument is casted to the type of the ; magnitude argument. Those casts can be folded away by the DAGCombiner. declare double @llvm.copysign.f64(double, double) declare float @llvm.copysign.f32(float, float) +declare half @llvm.copysign.f16(half, half) -define double @fold_promote(double %a, float %b) nounwind { -; RV32I-LABEL: fold_promote: +define double @fold_promote_d_s(double %a, float %b) nounwind { +; RV32I-LABEL: fold_promote_d_s: ; RV32I: # %bb.0: ; RV32I-NEXT: lui a3, 524288 ; RV32I-NEXT: and a2, a2, a3 @@ -26,7 +36,7 @@ ; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: ret ; -; RV64I-LABEL: fold_promote: +; RV64I-LABEL: fold_promote_d_s: ; RV64I: # %bb.0: ; RV64I-NEXT: addi a2, zero, -1 ; RV64I-NEXT: slli a2, a2, 63 @@ -39,7 +49,7 @@ ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; -; RV32IF-LABEL: fold_promote: +; RV32IF-LABEL: fold_promote_d_s: ; RV32IF: # %bb.0: ; RV32IF-NEXT: fmv.x.w a2, fa0 ; RV32IF-NEXT: lui a3, 524288 @@ -49,24 +59,183 @@ ; RV32IF-NEXT: or a1, a1, a2 ; RV32IF-NEXT: ret ; -; RV32IFD-LABEL: fold_promote: +; RV32IFD-LABEL: fold_promote_d_s: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: fcvt.d.s ft0, fa1 ; RV32IFD-NEXT: fsgnj.d fa0, fa0, ft0 ; RV32IFD-NEXT: ret ; -; RV64IFD-LABEL: fold_promote: +; RV64IFD-LABEL: fold_promote_d_s: ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fcvt.d.s ft0, fa1 ; RV64IFD-NEXT: fsgnj.d fa0, fa0, ft0 ; RV64IFD-NEXT: ret +; +; RV32IFZFH-LABEL: fold_promote_d_s: +; RV32IFZFH: # %bb.0: +; RV32IFZFH-NEXT: fmv.x.w a2, fa0 +; RV32IFZFH-NEXT: lui a3, 524288 +; RV32IFZFH-NEXT: and a2, a2, a3 +; RV32IFZFH-NEXT: addi a3, a3, -1 +; RV32IFZFH-NEXT: and a1, a1, a3 +; RV32IFZFH-NEXT: or a1, a1, a2 +; RV32IFZFH-NEXT: ret +; +; RV32IFDZFH-LABEL: fold_promote_d_s: +; RV32IFDZFH: # %bb.0: +; RV32IFDZFH-NEXT: fcvt.d.s ft0, fa1 +; RV32IFDZFH-NEXT: fsgnj.d fa0, fa0, ft0 +; RV32IFDZFH-NEXT: ret +; +; RV64IFDZFH-LABEL: fold_promote_d_s: +; RV64IFDZFH: # %bb.0: +; RV64IFDZFH-NEXT: fcvt.d.s ft0, fa1 +; RV64IFDZFH-NEXT: fsgnj.d fa0, fa0, ft0 +; RV64IFDZFH-NEXT: ret %c = fpext float %b to double %t = call double @llvm.copysign.f64(double %a, double %c) ret double %t } -define float @fold_demote(float %a, double %b) nounwind { -; RV32I-LABEL: fold_demote: +define double @fold_promote_d_h(double %a, half %b) nounwind { +; RV32I-LABEL: fold_promote_d_h: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a3, 524288 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: lui a3, 8 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: slli a2, a2, 16 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fold_promote_d_h: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a2, zero, -1 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: lui a2, 8 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: fold_promote_d_h: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.x.w a2, fa0 +; RV32IF-NEXT: lui a3, 524288 +; RV32IF-NEXT: and a2, a2, a3 +; RV32IF-NEXT: addi a3, a3, -1 +; RV32IF-NEXT: and a1, a1, a3 +; RV32IF-NEXT: or a1, a1, a2 +; RV32IF-NEXT: ret +; +; RV32IFD-LABEL: fold_promote_d_h: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: fcvt.d.s ft0, fa1 +; RV32IFD-NEXT: fsgnj.d fa0, fa0, ft0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fold_promote_d_h: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.s ft0, fa1 +; RV64IFD-NEXT: fsgnj.d fa0, fa0, ft0 +; RV64IFD-NEXT: ret +; +; RV32IFZFH-LABEL: fold_promote_d_h: +; RV32IFZFH: # %bb.0: +; RV32IFZFH-NEXT: fmv.x.h a2, fa0 +; RV32IFZFH-NEXT: lui a3, 524288 +; RV32IFZFH-NEXT: addi a3, a3, -1 +; RV32IFZFH-NEXT: and a1, a1, a3 +; RV32IFZFH-NEXT: lui a3, 8 +; RV32IFZFH-NEXT: and a2, a2, a3 +; RV32IFZFH-NEXT: slli a2, a2, 16 +; RV32IFZFH-NEXT: or a1, a1, a2 +; RV32IFZFH-NEXT: ret +; +; RV32IFDZFH-LABEL: fold_promote_d_h: +; RV32IFDZFH: # %bb.0: +; RV32IFDZFH-NEXT: fcvt.d.h ft0, fa1 +; RV32IFDZFH-NEXT: fsgnj.d fa0, fa0, ft0 +; RV32IFDZFH-NEXT: ret +; +; RV64IFDZFH-LABEL: fold_promote_d_h: +; RV64IFDZFH: # %bb.0: +; RV64IFDZFH-NEXT: fcvt.d.h ft0, fa1 +; RV64IFDZFH-NEXT: fsgnj.d fa0, fa0, ft0 +; RV64IFDZFH-NEXT: ret + %c = fpext half %b to double + %t = call double @llvm.copysign.f64(double %a, double %c) + ret double %t +} + +define float @fold_promote_f_h(float %a, half %b) nounwind { +; RV32I-LABEL: fold_promote_f_h: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: lui a2, 8 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fold_promote_f_h: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: slli a2, a2, 33 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: slli a2, a2, 15 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: fold_promote_f_h: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fsgnj.s fa0, fa0, fa1 +; RV32IF-NEXT: ret +; +; RV32IFD-LABEL: fold_promote_f_h: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: fsgnj.s fa0, fa0, fa1 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fold_promote_f_h: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fsgnj.s fa0, fa0, fa1 +; RV64IFD-NEXT: ret +; +; RV32IFZFH-LABEL: fold_promote_f_h: +; RV32IFZFH: # %bb.0: +; RV32IFZFH-NEXT: fcvt.s.h ft0, fa1 +; RV32IFZFH-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFZFH-NEXT: ret +; +; RV32IFDZFH-LABEL: fold_promote_f_h: +; RV32IFDZFH: # %bb.0: +; RV32IFDZFH-NEXT: fcvt.s.h ft0, fa1 +; RV32IFDZFH-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFDZFH-NEXT: ret +; +; RV64IFDZFH-LABEL: fold_promote_f_h: +; RV64IFDZFH: # %bb.0: +; RV64IFDZFH-NEXT: fcvt.s.h ft0, fa1 +; RV64IFDZFH-NEXT: fsgnj.s fa0, fa0, ft0 +; RV64IFDZFH-NEXT: ret + %c = fpext half %b to float + %t = call float @llvm.copysign.f32(float %a, float %c) + ret float %t +} + +define float @fold_demote_s_d(float %a, double %b) nounwind { +; RV32I-LABEL: fold_demote_s_d: ; RV32I: # %bb.0: ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: and a2, a2, a1 @@ -75,7 +244,7 @@ ; RV32I-NEXT: or a0, a0, a2 ; RV32I-NEXT: ret ; -; RV64I-LABEL: fold_demote: +; RV64I-LABEL: fold_demote_s_d: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, 524288 ; RV64I-NEXT: addiw a2, a2, -1 @@ -87,24 +256,263 @@ ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; -; RV32IF-LABEL: fold_demote: +; RV32IF-LABEL: fold_demote_s_d: ; RV32IF: # %bb.0: ; RV32IF-NEXT: fmv.w.x ft0, a1 ; RV32IF-NEXT: fsgnj.s fa0, fa0, ft0 ; RV32IF-NEXT: ret ; -; RV32IFD-LABEL: fold_demote: +; RV32IFD-LABEL: fold_demote_s_d: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: fcvt.s.d ft0, fa1 ; RV32IFD-NEXT: fsgnj.s fa0, fa0, ft0 ; RV32IFD-NEXT: ret ; -; RV64IFD-LABEL: fold_demote: +; RV64IFD-LABEL: fold_demote_s_d: ; RV64IFD: # %bb.0: ; RV64IFD-NEXT: fcvt.s.d ft0, fa1 ; RV64IFD-NEXT: fsgnj.s fa0, fa0, ft0 ; RV64IFD-NEXT: ret +; +; RV32IFZFH-LABEL: fold_demote_s_d: +; RV32IFZFH: # %bb.0: +; RV32IFZFH-NEXT: fmv.w.x ft0, a1 +; RV32IFZFH-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFZFH-NEXT: ret +; +; RV32IFDZFH-LABEL: fold_demote_s_d: +; RV32IFDZFH: # %bb.0: +; RV32IFDZFH-NEXT: fcvt.s.d ft0, fa1 +; RV32IFDZFH-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFDZFH-NEXT: ret +; +; RV64IFDZFH-LABEL: fold_demote_s_d: +; RV64IFDZFH: # %bb.0: +; RV64IFDZFH-NEXT: fcvt.s.d ft0, fa1 +; RV64IFDZFH-NEXT: fsgnj.s fa0, fa0, ft0 +; RV64IFDZFH-NEXT: ret %c = fptrunc double %b to float %t = call float @llvm.copysign.f32(float %a, float %c) ret float %t } + +define half @fold_demote_h_s(half %a, float %b) nounwind { +; RV32I-LABEL: fold_demote_h_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: call __gnu_h2f_ieee +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: and a2, s0, a1 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: call __gnu_f2h_ieee +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fold_demote_h_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: sd s0, 0(sp) +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: call __gnu_h2f_ieee +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: and a2, s0, a1 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: call __gnu_f2h_ieee +; RV64I-NEXT: ld s0, 0(sp) +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: fold_demote_h_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) +; RV32IF-NEXT: fsw fs0, 8(sp) +; RV32IF-NEXT: fmv.s fs0, fa1 +; RV32IF-NEXT: call __gnu_f2h_ieee +; RV32IF-NEXT: call __gnu_h2f_ieee +; RV32IF-NEXT: fsgnj.s fa0, fa0, fs0 +; RV32IF-NEXT: flw fs0, 8(sp) +; RV32IF-NEXT: lw ra, 12(sp) +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV32IFD-LABEL: fold_demote_h_s: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) +; RV32IFD-NEXT: fsd fs0, 0(sp) +; RV32IFD-NEXT: fmv.s fs0, fa1 +; RV32IFD-NEXT: call __gnu_f2h_ieee +; RV32IFD-NEXT: call __gnu_h2f_ieee +; RV32IFD-NEXT: fsgnj.s fa0, fa0, fs0 +; RV32IFD-NEXT: fld fs0, 0(sp) +; RV32IFD-NEXT: lw ra, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fold_demote_h_s: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) +; RV64IFD-NEXT: fsd fs0, 0(sp) +; RV64IFD-NEXT: fmv.s fs0, fa1 +; RV64IFD-NEXT: call __gnu_f2h_ieee +; RV64IFD-NEXT: call __gnu_h2f_ieee +; RV64IFD-NEXT: fsgnj.s fa0, fa0, fs0 +; RV64IFD-NEXT: fld fs0, 0(sp) +; RV64IFD-NEXT: ld ra, 8(sp) +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32IFZFH-LABEL: fold_demote_h_s: +; RV32IFZFH: # %bb.0: +; RV32IFZFH-NEXT: fcvt.h.s ft0, fa1 +; RV32IFZFH-NEXT: fsgnj.h fa0, fa0, ft0 +; RV32IFZFH-NEXT: ret +; +; RV32IFDZFH-LABEL: fold_demote_h_s: +; RV32IFDZFH: # %bb.0: +; RV32IFDZFH-NEXT: fcvt.h.s ft0, fa1 +; RV32IFDZFH-NEXT: fsgnj.h fa0, fa0, ft0 +; RV32IFDZFH-NEXT: ret +; +; RV64IFDZFH-LABEL: fold_demote_h_s: +; RV64IFDZFH: # %bb.0: +; RV64IFDZFH-NEXT: fcvt.h.s ft0, fa1 +; RV64IFDZFH-NEXT: fsgnj.h fa0, fa0, ft0 +; RV64IFDZFH-NEXT: ret + %c = fptrunc float %b to half + %t = call half @llvm.copysign.f16(half %a, half %c) + ret half %t +} + +define half @fold_demote_h_d(half %a, double %b) nounwind { +; RV32I-LABEL: fold_demote_h_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: call __gnu_h2f_ieee +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: and a2, s0, a1 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: call __gnu_f2h_ieee +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fold_demote_h_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: sd s0, 0(sp) +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: call __gnu_h2f_ieee +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: addi a1, zero, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: and a1, s0, a1 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: call __gnu_f2h_ieee +; RV64I-NEXT: ld s0, 0(sp) +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: fold_demote_h_d: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) +; RV32IF-NEXT: sw s0, 8(sp) +; RV32IF-NEXT: mv s0, a1 +; RV32IF-NEXT: call __gnu_f2h_ieee +; RV32IF-NEXT: call __gnu_h2f_ieee +; RV32IF-NEXT: fmv.w.x ft0, s0 +; RV32IF-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IF-NEXT: lw s0, 8(sp) +; RV32IF-NEXT: lw ra, 12(sp) +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV32IFD-LABEL: fold_demote_h_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) +; RV32IFD-NEXT: fsd fs0, 0(sp) +; RV32IFD-NEXT: fmv.d fs0, fa1 +; RV32IFD-NEXT: call __gnu_f2h_ieee +; RV32IFD-NEXT: call __gnu_h2f_ieee +; RV32IFD-NEXT: fcvt.s.d ft0, fs0 +; RV32IFD-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFD-NEXT: fld fs0, 0(sp) +; RV32IFD-NEXT: lw ra, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fold_demote_h_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addi sp, sp, -16 +; RV64IFD-NEXT: sd ra, 8(sp) +; RV64IFD-NEXT: fsd fs0, 0(sp) +; RV64IFD-NEXT: fmv.d fs0, fa1 +; RV64IFD-NEXT: call __gnu_f2h_ieee +; RV64IFD-NEXT: call __gnu_h2f_ieee +; RV64IFD-NEXT: fcvt.s.d ft0, fs0 +; RV64IFD-NEXT: fsgnj.s fa0, fa0, ft0 +; RV64IFD-NEXT: fld fs0, 0(sp) +; RV64IFD-NEXT: ld ra, 8(sp) +; RV64IFD-NEXT: addi sp, sp, 16 +; RV64IFD-NEXT: ret +; +; RV32IFZFH-LABEL: fold_demote_h_d: +; RV32IFZFH: # %bb.0: +; RV32IFZFH-NEXT: srli a0, a1, 16 +; RV32IFZFH-NEXT: fmv.h.x ft0, a0 +; RV32IFZFH-NEXT: fsgnj.h fa0, fa0, ft0 +; RV32IFZFH-NEXT: ret +; +; RV32IFDZFH-LABEL: fold_demote_h_d: +; RV32IFDZFH: # %bb.0: +; RV32IFDZFH-NEXT: fcvt.h.d ft0, fa1 +; RV32IFDZFH-NEXT: fsgnj.h fa0, fa0, ft0 +; RV32IFDZFH-NEXT: ret +; +; RV64IFDZFH-LABEL: fold_demote_h_d: +; RV64IFDZFH: # %bb.0: +; RV64IFDZFH-NEXT: fcvt.h.d ft0, fa1 +; RV64IFDZFH-NEXT: fsgnj.h fa0, fa0, ft0 +; RV64IFDZFH-NEXT: ret + %c = fptrunc double %b to half + %t = call half @llvm.copysign.f16(half %a, half %c) + ret half %t +} diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s \ +; RUN: | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s \ +; RUN: | FileCheck -check-prefix=RV64IZFH %s + +; These tests are each targeted at a particular RISC-V FPU instruction. Most +; other files in this folder exercise LLVM IR instructions that don't directly +; match a RISC-V instruction. + +define half @fadd_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fadd_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fadd.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fadd_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fadd.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fadd half %a, %b + ret half %1 +} + +define half @fsub_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fsub_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fsub.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fsub_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fsub.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fsub half %a, %b + ret half %1 +} + +define half @fmul_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fmul_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmul.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fmul_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmul.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fmul half %a, %b + ret half %1 +} + +define half @fdiv_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fdiv_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fdiv.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fdiv_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fdiv.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fdiv half %a, %b + ret half %1 +} + +declare half @llvm.sqrt.f16(half) + +define half @fsqrt_s(half %a) nounwind { +; RV32IZFH-LABEL: fsqrt_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fsqrt.h fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fsqrt_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fsqrt.h fa0, fa0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.sqrt.f16(half %a) + ret half %1 +} + +declare half @llvm.copysign.f16(half, half) + +define half @fsgnj_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fsgnj_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fsgnj.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fsgnj_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fsgnj.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.copysign.f16(half %a, half %b) + ret half %1 +} + +; This function performs extra work to ensure that +; DAGCombiner::visitBITCAST doesn't replace the fneg with an xor. +define i32 @fneg_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fneg_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fadd.h ft0, fa0, fa0 +; RV32IZFH-NEXT: fneg.h ft1, ft0 +; RV32IZFH-NEXT: feq.h a0, ft0, ft1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fneg_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fadd.h ft0, fa0, fa0 +; RV64IZFH-NEXT: fneg.h ft1, ft0 +; RV64IZFH-NEXT: feq.h a0, ft0, ft1 +; RV64IZFH-NEXT: ret + %1 = fadd half %a, %a + %2 = fneg half %1 + %3 = fcmp oeq half %1, %2 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +; This function performs extra work to ensure that +; DAGCombiner::visitBITCAST doesn't replace the fneg with an xor. +define half @fsgnjn_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fsgnjn_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV32IZFH-NEXT: fsgnjn.h fa0, fa0, ft0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fsgnjn_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV64IZFH-NEXT: fsgnjn.h fa0, fa0, ft0 +; RV64IZFH-NEXT: ret + %1 = fadd half %a, %b + %2 = fneg half %1 + %3 = call half @llvm.copysign.f16(half %a, half %2) + ret half %3 +} + +declare half @llvm.fabs.f16(half) + +; This function performs extra work to ensure that +; DAGCombiner::visitBITCAST doesn't replace the fabs with an and. +define half @fabs_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fabs_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV32IZFH-NEXT: fabs.h ft1, ft0 +; RV32IZFH-NEXT: fadd.h fa0, ft1, ft0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fabs_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV64IZFH-NEXT: fabs.h ft1, ft0 +; RV64IZFH-NEXT: fadd.h fa0, ft1, ft0 +; RV64IZFH-NEXT: ret + %1 = fadd half %a, %b + %2 = call half @llvm.fabs.f16(half %1) + %3 = fadd half %2, %1 + ret half %3 +} + +declare half @llvm.minnum.f16(half, half) + +define half @fmin_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fmin_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmin.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fmin_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmin.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.minnum.f16(half %a, half %b) + ret half %1 +} + +declare half @llvm.maxnum.f16(half, half) + +define half @fmax_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fmax_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmax.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fmax_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmax.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.maxnum.f16(half %a, half %b) + ret half %1 +} + +define i32 @feq_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: feq_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: feq_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fcmp oeq half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @flt_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: flt_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: flt_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fcmp olt half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fle_s(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fle_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fle.h a0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fle_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fle.h a0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fcmp ole half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +declare half @llvm.fma.f16(half, half, half) + +define half @fmadd_s(half %a, half %b, half %c) nounwind { +; RV32IZFH-LABEL: fmadd_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmadd.h fa0, fa0, fa1, fa2 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fmadd_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmadd.h fa0, fa0, fa1, fa2 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.fma.f16(half %a, half %b, half %c) + ret half %1 +} + +define half @fmsub_s(half %a, half %b, half %c) nounwind { +; RV32IZFH-LABEL: fmsub_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmv.h.x ft0, zero +; RV32IZFH-NEXT: fadd.h ft0, fa2, ft0 +; RV32IZFH-NEXT: fmsub.h fa0, fa0, fa1, ft0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fmsub_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmv.h.x ft0, zero +; RV64IZFH-NEXT: fadd.h ft0, fa2, ft0 +; RV64IZFH-NEXT: fmsub.h fa0, fa0, fa1, ft0 +; RV64IZFH-NEXT: ret + %c_ = fadd half 0.0, %c ; avoid negation using xor + %negc = fsub half -0.0, %c_ + %1 = call half @llvm.fma.f16(half %a, half %b, half %negc) + ret half %1 +} + +define half @fnmadd_s(half %a, half %b, half %c) nounwind { +; RV32IZFH-LABEL: fnmadd_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmv.h.x ft0, zero +; RV32IZFH-NEXT: fadd.h ft1, fa0, ft0 +; RV32IZFH-NEXT: fadd.h ft0, fa2, ft0 +; RV32IZFH-NEXT: fnmadd.h fa0, ft1, fa1, ft0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fnmadd_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmv.h.x ft0, zero +; RV64IZFH-NEXT: fadd.h ft1, fa0, ft0 +; RV64IZFH-NEXT: fadd.h ft0, fa2, ft0 +; RV64IZFH-NEXT: fnmadd.h fa0, ft1, fa1, ft0 +; RV64IZFH-NEXT: ret + %a_ = fadd half 0.0, %a + %c_ = fadd half 0.0, %c + %nega = fsub half -0.0, %a_ + %negc = fsub half -0.0, %c_ + %1 = call half @llvm.fma.f16(half %nega, half %b, half %negc) + ret half %1 +} + +define half @fnmsub_s(half %a, half %b, half %c) nounwind { +; RV32IZFH-LABEL: fnmsub_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmv.h.x ft0, zero +; RV32IZFH-NEXT: fadd.h ft0, fa0, ft0 +; RV32IZFH-NEXT: fnmsub.h fa0, ft0, fa1, fa2 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fnmsub_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmv.h.x ft0, zero +; RV64IZFH-NEXT: fadd.h ft0, fa0, ft0 +; RV64IZFH-NEXT: fnmsub.h fa0, ft0, fa1, fa2 +; RV64IZFH-NEXT: ret + %a_ = fadd half 0.0, %a + %nega = fsub half -0.0, %a_ + %1 = call half @llvm.fma.f16(half %nega, half %b, half %c) + ret half %1 +} diff --git a/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-bitmanip-dagcombines.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s \ +; RUN: | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s \ +; RUN: | FileCheck -check-prefix=RV64IZFH %s + +; This file tests cases where simple floating point operations can be +; profitably handled though bit manipulation if a soft-float ABI is being used +; (e.g. fneg implemented by XORing the sign bit). This is typically handled in +; DAGCombiner::visitBITCAST, but this target-independent code may not trigger +; in cases where we perform custom legalisation (e.g. RV64F). + +define half @fneg(half %a) nounwind { +; RV32I-LABEL: fneg: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 1048568 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IZFH-LABEL: fneg: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fneg.h fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64I-LABEL: fneg: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 1048568 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IZFH-LABEL: fneg: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fneg.h fa0, fa0 +; RV64IZFH-NEXT: ret + %1 = fneg half %a + ret half %1 +} + +declare half @llvm.fabs.f16(half) + +define half @fabs(half %a) nounwind { +; RV32I-LABEL: fabs: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 8 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32IZFH-LABEL: fabs: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fabs.h fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64I-LABEL: fabs: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64IZFH-LABEL: fabs: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fabs.h fa0, fa0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.fabs.f16(half %a) + ret half %1 +} + +declare half @llvm.copysign.f16(half, half) + +; DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN will convert to bitwise +; operations if half precision floating point isn't supported. A combine could +; be written to do the same even when f16 is legal. + +define half @fcopysign_fneg(half %a, half %b) nounwind { +; RV32I-LABEL: fcopysign_fneg: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: call __gnu_h2f_ieee +; RV32I-NEXT: not a1, s0 +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: lui a2, 8 +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: call __gnu_f2h_ieee +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IZFH-LABEL: fcopysign_fneg: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fsgnjn.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64I-LABEL: fcopysign_fneg: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: sd s0, 0(sp) +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: call __gnu_h2f_ieee +; RV64I-NEXT: not a1, s0 +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: slli a2, a2, 33 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: slli a2, a2, 15 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: call __gnu_f2h_ieee +; RV64I-NEXT: ld s0, 0(sp) +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IZFH-LABEL: fcopysign_fneg: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fsgnjn.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fneg half %b + %2 = call half @llvm.copysign.f16(half %a, half %1) + ret half %2 +} diff --git a/llvm/test/CodeGen/RISCV/half-br-fcmp.ll b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-br-fcmp.ll @@ -0,0 +1,651 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFH %s + +declare void @abort() +declare void @exit(i32) +declare half @dummy(half) + +define void @br_fcmp_false(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_false: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: addi a0, zero, 1 +; RV32IZFH-NEXT: bnez a0, .LBB0_2 +; RV32IZFH-NEXT: # %bb.1: # %if.then +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB0_2: # %if.else +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_false: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: addi a0, zero, 1 +; RV64IZFH-NEXT: bnez a0, .LBB0_2 +; RV64IZFH-NEXT: # %bb.1: # %if.then +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB0_2: # %if.else +; RV64IZFH-NEXT: call abort + %1 = fcmp false half %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + ret void +if.else: + tail call void @abort() + unreachable +} + +define void @br_fcmp_oeq(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_oeq: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: bnez a0, .LBB1_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB1_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_oeq: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: bnez a0, .LBB1_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB1_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp oeq half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +; TODO: generated code quality for this is very poor due to +; DAGCombiner::visitXOR converting the legal setoeq to setune, which requires +; expansion. +define void @br_fcmp_oeq_alt(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_oeq_alt: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: beqz a0, .LBB2_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB2_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_oeq_alt: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: beqz a0, .LBB2_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB2_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp oeq half %a, %b + br i1 %1, label %if.then, label %if.else +if.then: + tail call void @abort() + unreachable +if.else: + ret void +} + +define void @br_fcmp_ogt(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_ogt: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: flt.h a0, fa1, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB3_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB3_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_ogt: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: flt.h a0, fa1, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB3_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB3_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp ogt half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_oge(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_oge: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: fle.h a0, fa1, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB4_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB4_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_oge: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: fle.h a0, fa1, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB4_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB4_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp oge half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_olt(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_olt: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: bnez a0, .LBB5_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB5_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_olt: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: bnez a0, .LBB5_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB5_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp olt half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ole(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_ole: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: fle.h a0, fa0, fa1 +; RV32IZFH-NEXT: bnez a0, .LBB6_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB6_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_ole: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: fle.h a0, fa0, fa1 +; RV64IZFH-NEXT: bnez a0, .LBB6_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB6_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp ole half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +; TODO: feq.h+sltiu+bne -> feq.h+beq +define void @br_fcmp_one(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_one: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: feq.h a0, fa1, fa1 +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: feq.h a1, fa0, fa1 +; RV32IZFH-NEXT: not a1, a1 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: bnez a0, .LBB7_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB7_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_one: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: feq.h a0, fa1, fa1 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: feq.h a1, fa0, fa1 +; RV64IZFH-NEXT: not a1, a1 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: bnez a0, .LBB7_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB7_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp one half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ord(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_ord: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: feq.h a0, fa1, fa1 +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: bnez a0, .LBB8_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB8_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_ord: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: feq.h a0, fa1, fa1 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: bnez a0, .LBB8_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB8_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp ord half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ueq(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_ueq: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: feq.h a1, fa1, fa1 +; RV32IZFH-NEXT: feq.h a2, fa0, fa0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: seqz a1, a1 +; RV32IZFH-NEXT: or a0, a0, a1 +; RV32IZFH-NEXT: bnez a0, .LBB9_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB9_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_ueq: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: feq.h a1, fa1, fa1 +; RV64IZFH-NEXT: feq.h a2, fa0, fa0 +; RV64IZFH-NEXT: and a1, a2, a1 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: or a0, a0, a1 +; RV64IZFH-NEXT: bnez a0, .LBB9_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB9_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp ueq half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ugt(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_ugt: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: fle.h a0, fa0, fa1 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: bnez a0, .LBB10_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB10_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_ugt: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: fle.h a0, fa0, fa1 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: bnez a0, .LBB10_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB10_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp ugt half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_uge(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_uge: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: bnez a0, .LBB11_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB11_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_uge: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: bnez a0, .LBB11_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB11_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp uge half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ult(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_ult: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: fle.h a0, fa1, fa0 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: bnez a0, .LBB12_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB12_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_ult: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: fle.h a0, fa1, fa0 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: bnez a0, .LBB12_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB12_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp ult half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_ule(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_ule: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: flt.h a0, fa1, fa0 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: bnez a0, .LBB13_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB13_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_ule: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: flt.h a0, fa1, fa0 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: bnez a0, .LBB13_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB13_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp ule half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_une(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_une: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: bnez a0, .LBB14_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB14_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_une: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: bnez a0, .LBB14_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB14_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp une half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_uno(half %a, half %b) nounwind { +; TODO: sltiu+bne -> beq +; RV32IZFH-LABEL: br_fcmp_uno: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: feq.h a0, fa1, fa1 +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: seqz a0, a0 +; RV32IZFH-NEXT: bnez a0, .LBB15_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB15_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_uno: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: feq.h a0, fa1, fa1 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: seqz a0, a0 +; RV64IZFH-NEXT: bnez a0, .LBB15_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB15_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp uno half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} + +define void @br_fcmp_true(half %a, half %b) nounwind { +; RV32IZFH-LABEL: br_fcmp_true: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: addi a0, zero, 1 +; RV32IZFH-NEXT: bnez a0, .LBB16_2 +; RV32IZFH-NEXT: # %bb.1: # %if.else +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; RV32IZFH-NEXT: .LBB16_2: # %if.then +; RV32IZFH-NEXT: call abort +; +; RV64IZFH-LABEL: br_fcmp_true: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: addi a0, zero, 1 +; RV64IZFH-NEXT: bnez a0, .LBB16_2 +; RV64IZFH-NEXT: # %bb.1: # %if.else +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: .LBB16_2: # %if.then +; RV64IZFH-NEXT: call abort + %1 = fcmp true half %a, %b + br i1 %1, label %if.then, label %if.else +if.else: + ret void +if.then: + tail call void @abort() + unreachable +} diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -0,0 +1,511 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32d < %s | FileCheck -check-prefix=RV32IDZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64d < %s | FileCheck -check-prefix=RV64IDZFH %s + +define i16 @fcvt_si_h(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_si_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_si_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_si_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_si_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = fptosi half %a to i16 + ret i16 %1 +} + +define i16 @fcvt_ui_h(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_ui_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_ui_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_ui_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_ui_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = fptoui half %a to i16 + ret i16 %1 +} + +define i32 @fcvt_w_h(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_w_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_w_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_w_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_w_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = fptosi half %a to i32 + ret i32 %1 +} + +define i32 @fcvt_wu_h(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_wu_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_wu_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_wu_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_wu_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = fptoui half %a to i32 + ret i32 %1 +} + +define i64 @fcvt_l_h(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_l_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: call __fixhfdi +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_l_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) +; RV32IDZFH-NEXT: call __fixhfdi +; RV32IDZFH-NEXT: lw ra, 12(sp) +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_l_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_l_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = fptosi half %a to i64 + ret i64 %1 +} + +define i64 @fcvt_lu_h(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_lu_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: call __fixunshfdi +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_lu_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) +; RV32IDZFH-NEXT: call __fixunshfdi +; RV32IDZFH-NEXT: lw ra, 12(sp) +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_lu_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_lu_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = fptoui half %a to i64 + ret i64 %1 +} + +define half @fcvt_h_si(i16 %a) nounwind { +; RV32IZFH-LABEL: fcvt_h_si: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: slli a0, a0, 16 +; RV32IZFH-NEXT: srai a0, a0, 16 +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_si: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: slli a0, a0, 16 +; RV32IDZFH-NEXT: srai a0, a0, 16 +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_si: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: slli a0, a0, 48 +; RV64IZFH-NEXT: srai a0, a0, 48 +; RV64IZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_si: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: slli a0, a0, 48 +; RV64IDZFH-NEXT: srai a0, a0, 48 +; RV64IDZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = sitofp i16 %a to half + ret half %1 +} + +define half @fcvt_h_ui(i16 %a) nounwind { +; RV32IZFH-LABEL: fcvt_h_ui: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lui a1, 16 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: and a0, a0, a1 +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_ui: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lui a1, 16 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a0, a1 +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_ui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lui a1, 16 +; RV64IZFH-NEXT: addiw a1, a1, -1 +; RV64IZFH-NEXT: and a0, a0, a1 +; RV64IZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_ui: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lui a1, 16 +; RV64IDZFH-NEXT: addiw a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a0, a1 +; RV64IDZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = uitofp i16 %a to half + ret half %1 +} + +define half @fcvt_h_w(i32 %a) nounwind { +; RV32IZFH-LABEL: fcvt_h_w: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = sitofp i32 %a to half + ret half %1 +} + +define half @fcvt_h_wu(i32 %a) nounwind { +; RV32IZFH-LABEL: fcvt_h_wu: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = uitofp i32 %a to half + ret half %1 +} + +define half @fcvt_h_l(i64 %a) nounwind { +; RV32IZFH-LABEL: fcvt_h_l: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: call __floatdihf +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_l: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) +; RV32IDZFH-NEXT: call __floatdihf +; RV32IDZFH-NEXT: lw ra, 12(sp) +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_l: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_l: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = sitofp i64 %a to half + ret half %1 +} + +define half @fcvt_h_lu(i64 %a) nounwind { +; RV32IZFH-LABEL: fcvt_h_lu: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: call __floatundihf +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_lu: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) +; RV32IDZFH-NEXT: call __floatundihf +; RV32IDZFH-NEXT: lw ra, 12(sp) +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_lu: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_lu: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = uitofp i64 %a to half + ret half %1 +} + +define half @fcvt_h_s(float %a) nounwind { +; RV32IZFH-LABEL: fcvt_h_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_s: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_s: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = fptrunc float %a to half + ret half %1 +} + +define float @fcvt_s_h(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_s_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_s_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_s_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_s_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = fpext half %a to float + ret float %1 +} + +define half @fcvt_h_d(double %a) nounwind { +; RV32IZFH-LABEL: fcvt_h_d: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: call __truncdfhf2 +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_d: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.d fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_d: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: call __truncdfhf2 +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_d: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.d fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = fptrunc double %a to half + ret half %1 +} + +define double @fcvt_d_h(half %a) nounwind { +; RV32IZFH-LABEL: fcvt_d_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call __extendsfdf2 +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_d_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.d.h fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_d_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: call __extendsfdf2 +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_d_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.d.h fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = fpext half %a to double + ret double %1 +} + +define half @bitcast_h_i16(i16 %a) nounwind { +; RV32IZFH-LABEL: bitcast_h_i16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmv.h.x fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: bitcast_h_i16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fmv.h.x fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: bitcast_h_i16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmv.h.x fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: bitcast_h_i16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fmv.h.x fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = bitcast i16 %a to half + ret half %1 +} + +define i16 @bitcast_i16_h(half %a) nounwind { +; RV32IZFH-LABEL: bitcast_i16_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmv.x.h a0, fa0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: bitcast_i16_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fmv.x.h a0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: bitcast_i16_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmv.x.h a0, fa0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: bitcast_i16_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fmv.x.h a0, fa0 +; RV64IDZFH-NEXT: ret + %1 = bitcast half %a to i16 + ret i16 %1 +} diff --git a/llvm/test/CodeGen/RISCV/half-fcmp.ll b/llvm/test/CodeGen/RISCV/half-fcmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-fcmp.ll @@ -0,0 +1,285 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFH %s + +define i32 @fcmp_false(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_false: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: mv a0, zero +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_false: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: mv a0, zero +; RV64IZFH-NEXT: ret + %1 = fcmp false half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oeq(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_oeq: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_oeq: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fcmp oeq half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_ogt: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: flt.h a0, fa1, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_ogt: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: flt.h a0, fa1, fa0 +; RV64IZFH-NEXT: ret + %1 = fcmp ogt half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_oge: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fle.h a0, fa1, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_oge: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fle.h a0, fa1, fa0 +; RV64IZFH-NEXT: ret + %1 = fcmp oge half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_olt(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_olt: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_olt: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fcmp olt half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ole(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_ole: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fle.h a0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_ole: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fle.h a0, fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fcmp ole half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_one(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_one: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa1, fa1 +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: feq.h a1, fa0, fa1 +; RV32IZFH-NEXT: not a1, a1 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_one: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa1, fa1 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: feq.h a1, fa0, fa1 +; RV64IZFH-NEXT: not a1, a1 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: ret + %1 = fcmp one half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_ord: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa1, fa1 +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_ord: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa1, fa1 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: ret + %1 = fcmp ord half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ueq(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_ueq: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: feq.h a1, fa1, fa1 +; RV32IZFH-NEXT: feq.h a2, fa0, fa0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: seqz a1, a1 +; RV32IZFH-NEXT: or a0, a0, a1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_ueq: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: feq.h a1, fa1, fa1 +; RV64IZFH-NEXT: feq.h a2, fa0, fa0 +; RV64IZFH-NEXT: and a1, a2, a1 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: or a0, a0, a1 +; RV64IZFH-NEXT: ret + %1 = fcmp ueq half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ugt(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_ugt: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fle.h a0, fa0, fa1 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_ugt: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fle.h a0, fa0, fa1 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: ret + %1 = fcmp ugt half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uge(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_uge: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_uge: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: ret + %1 = fcmp uge half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ult(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_ult: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fle.h a0, fa1, fa0 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_ult: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fle.h a0, fa1, fa0 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: ret + %1 = fcmp ult half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ule(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_ule: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: flt.h a0, fa1, fa0 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_ule: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: flt.h a0, fa1, fa0 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: ret + %1 = fcmp ule half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_une: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_une: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: ret + %1 = fcmp une half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_uno(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_uno: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa1, fa1 +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: seqz a0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_uno: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa1, fa1 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: seqz a0, a0 +; RV64IZFH-NEXT: ret + %1 = fcmp uno half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_true(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fcmp_true: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi a0, zero, 1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcmp_true: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi a0, zero, 1 +; RV64IZFH-NEXT: ret + %1 = fcmp true half %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-imm.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFH %s + +; TODO: constant pool shouldn't be necessary for RV32IZfh and RV64IZfh +define half @half_imm() nounwind { +; RV32IZFH-LABEL: half_imm: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI0_0) +; RV32IZFH-NEXT: flh fa0, %lo(.LCPI0_0)(a0) +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: half_imm: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI0_0) +; RV64IZFH-NEXT: flh fa0, %lo(.LCPI0_0)(a0) +; RV64IZFH-NEXT: ret + ret half 3.0 +} + +define half @half_imm_op(half %a) nounwind { +; RV32IZFH-LABEL: half_imm_op: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IZFH-NEXT: flh ft0, %lo(.LCPI1_0)(a0) +; RV32IZFH-NEXT: fadd.h fa0, fa0, ft0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: half_imm_op: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV64IZFH-NEXT: flh ft0, %lo(.LCPI1_0)(a0) +; RV64IZFH-NEXT: fadd.h fa0, fa0, ft0 +; RV64IZFH-NEXT: ret + %1 = fadd half %a, 1.0 + ret half %1 +} diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -0,0 +1,195 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+d -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32d < %s | FileCheck -check-prefix=RV32IDZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+d -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64d < %s | FileCheck -check-prefix=RV64IDZFH %s + +declare half @llvm.sqrt.f16(half) + +define half @sqrt_f16(half %a) nounwind { +; RV32IZFH-LABEL: sqrt_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fsqrt.h fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: sqrt_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fsqrt.h fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: sqrt_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fsqrt.h fa0, fa0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: sqrt_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fsqrt.h fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.sqrt.f16(half %a) + ret half %1 +} + +declare half @llvm.fma.f16(half, half, half) + +define half @fma_f16(half %a, half %b, half %c) nounwind { +; RV32IZFH-LABEL: fma_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmadd.h fa0, fa0, fa1, fa2 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fma_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fmadd.h fa0, fa0, fa1, fa2 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fma_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmadd.h fa0, fa0, fa1, fa2 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fma_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fmadd.h fa0, fa0, fa1, fa2 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.fma.f16(half %a, half %b, half %c) + ret half %1 +} + +declare half @llvm.fmuladd.f16(half, half, half) + +define half @fmuladd_f16(half %a, half %b, half %c) nounwind { +; RV32IZFH-LABEL: fmuladd_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmul.h ft0, fa0, fa1 +; RV32IZFH-NEXT: fadd.h fa0, ft0, fa2 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fmuladd_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fmul.h ft0, fa0, fa1 +; RV32IDZFH-NEXT: fadd.h fa0, ft0, fa2 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fmuladd_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmul.h ft0, fa0, fa1 +; RV64IZFH-NEXT: fadd.h fa0, ft0, fa2 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fmuladd_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fmul.h ft0, fa0, fa1 +; RV64IDZFH-NEXT: fadd.h fa0, ft0, fa2 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.fmuladd.f16(half %a, half %b, half %c) + ret half %1 +} + +declare half @llvm.fabs.f16(half) + +define half @fabs_f16(half %a) nounwind { +; RV32IZFH-LABEL: fabs_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fabs.h fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fabs_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fabs.h fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: fabs_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fabs.h fa0, fa0 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fabs_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fabs.h fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.fabs.f16(half %a) + ret half %1 +} + +declare half @llvm.minnum.f16(half, half) + +define half @minnum_f16(half %a, half %b) nounwind { +; RV32IZFH-LABEL: minnum_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmin.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: minnum_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fmin.h fa0, fa0, fa1 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: minnum_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmin.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: minnum_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fmin.h fa0, fa0, fa1 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.minnum.f16(half %a, half %b) + ret half %1 +} + +declare half @llvm.maxnum.f16(half, half) + +define half @maxnum_f16(half %a, half %b) nounwind { +; RV32IZFH-LABEL: maxnum_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmax.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: maxnum_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fmax.h fa0, fa0, fa1 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: maxnum_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmax.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: maxnum_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fmax.h fa0, fa0, fa1 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.maxnum.f16(half %a, half %b) + ret half %1 +} + +declare half @llvm.copysign.f16(half, half) + +define half @copysign_f16(half %a, half %b) nounwind { +; RV32IZFH-LABEL: copysign_f16: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fsgnj.h fa0, fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: copysign_f16: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fsgnj.h fa0, fa0, fa1 +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: copysign_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fsgnj.h fa0, fa0, fa1 +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: copysign_f16: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fsgnj.h fa0, fa0, fa1 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.copysign.f16(half %a, half %b) + ret half %1 +} diff --git a/llvm/test/CodeGen/RISCV/half-isnan.ll b/llvm/test/CodeGen/RISCV/half-isnan.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-isnan.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFH %s + +define zeroext i1 @half_is_nan(half %a) nounwind { +; RV32IZFH-LABEL: half_is_nan: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: seqz a0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: half_is_nan: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: seqz a0, a0 +; RV64IZFH-NEXT: ret + %1 = fcmp uno half %a, 0.000000e+00 + ret i1 %1 +} + +define zeroext i1 @half_not_nan(half %a) nounwind { +; RV32IZFH-LABEL: half_not_nan: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: half_not_nan: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa0 +; RV64IZFH-NEXT: ret + %1 = fcmp ord half %a, 0.000000e+00 + ret i1 %1 +} diff --git a/llvm/test/CodeGen/RISCV/half-mem.ll b/llvm/test/CodeGen/RISCV/half-mem.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-mem.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFH %s + +define half @flh(half *%a) nounwind { +; RV32IZFH-LABEL: flh: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: flh ft0, 0(a0) +; RV32IZFH-NEXT: flh ft1, 6(a0) +; RV32IZFH-NEXT: fadd.h fa0, ft0, ft1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: flh: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: flh ft0, 0(a0) +; RV64IZFH-NEXT: flh ft1, 6(a0) +; RV64IZFH-NEXT: fadd.h fa0, ft0, ft1 +; RV64IZFH-NEXT: ret + %1 = load half, half* %a + %2 = getelementptr half, half* %a, i32 3 + %3 = load half, half* %2 +; Use both loaded values in an FP op to ensure an flh is used, even for the +; soft half ABI + %4 = fadd half %1, %3 + ret half %4 +} + +define void @fsh(half *%a, half %b, half %c) nounwind { +; Use %b and %c in an FP op to ensure half precision floating point registers +; are used, even for the soft half ABI +; RV32IZFH-LABEL: fsh: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV32IZFH-NEXT: fsh ft0, 0(a0) +; RV32IZFH-NEXT: fsh ft0, 16(a0) +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fsh: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV64IZFH-NEXT: fsh ft0, 0(a0) +; RV64IZFH-NEXT: fsh ft0, 16(a0) +; RV64IZFH-NEXT: ret + %1 = fadd half %b, %c + store half %1, half* %a + %2 = getelementptr half, half* %a, i32 8 + store half %1, half* %2 + ret void +} + +; Check load and store to a global +@G = global half 0.0 + +define half @flh_fsh_global(half %a, half %b) nounwind { +; Use %a and %b in an FP op to ensure half precision floating point registers +; are used, even for the soft half ABI +; RV32IZFH-LABEL: flh_fsh_global: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fadd.h fa0, fa0, fa1 +; RV32IZFH-NEXT: lui a0, %hi(G) +; RV32IZFH-NEXT: flh ft0, %lo(G)(a0) +; RV32IZFH-NEXT: fsh fa0, %lo(G)(a0) +; RV32IZFH-NEXT: addi a0, a0, %lo(G) +; RV32IZFH-NEXT: flh ft0, 18(a0) +; RV32IZFH-NEXT: fsh fa0, 18(a0) +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: flh_fsh_global: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fadd.h fa0, fa0, fa1 +; RV64IZFH-NEXT: lui a0, %hi(G) +; RV64IZFH-NEXT: flh ft0, %lo(G)(a0) +; RV64IZFH-NEXT: fsh fa0, %lo(G)(a0) +; RV64IZFH-NEXT: addi a0, a0, %lo(G) +; RV64IZFH-NEXT: flh ft0, 18(a0) +; RV64IZFH-NEXT: fsh fa0, 18(a0) +; RV64IZFH-NEXT: ret + %1 = fadd half %a, %b + %2 = load volatile half, half* @G + store half %1, half* @G + %3 = getelementptr half, half* @G, i32 9 + %4 = load volatile half, half* %3 + store half %1, half* %3 + ret half %1 +} + +; Ensure that 1 is added to the high 20 bits if bit 11 of the low part is 1 +define half @flh_fsh_constant(half %a) nounwind { +; RV32IZFH-LABEL: flh_fsh_constant: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lui a0, 912092 +; RV32IZFH-NEXT: flh ft0, -273(a0) +; RV32IZFH-NEXT: fadd.h fa0, fa0, ft0 +; RV32IZFH-NEXT: fsh fa0, -273(a0) +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: flh_fsh_constant: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lui a0, 56 +; RV64IZFH-NEXT: addiw a0, a0, -1353 +; RV64IZFH-NEXT: slli a0, a0, 14 +; RV64IZFH-NEXT: flh ft0, -273(a0) +; RV64IZFH-NEXT: fadd.h fa0, fa0, ft0 +; RV64IZFH-NEXT: fsh fa0, -273(a0) +; RV64IZFH-NEXT: ret + %1 = inttoptr i32 3735928559 to half* + %2 = load volatile half, half* %1 + %3 = fadd half %a, %2 + store half %3, half* %1 + ret half %3 +} + +declare void @notdead(i8*) + +define half @flh_stack(half %a) nounwind { +; RV32IZFH-LABEL: flh_stack: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: fsw fs0, 8(sp) +; RV32IZFH-NEXT: fmv.h fs0, fa0 +; RV32IZFH-NEXT: addi a0, sp, 4 +; RV32IZFH-NEXT: call notdead +; RV32IZFH-NEXT: flh ft0, 4(sp) +; RV32IZFH-NEXT: fadd.h fa0, ft0, fs0 +; RV32IZFH-NEXT: flw fs0, 8(sp) +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: flh_stack: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: fsw fs0, 4(sp) +; RV64IZFH-NEXT: fmv.h fs0, fa0 +; RV64IZFH-NEXT: mv a0, sp +; RV64IZFH-NEXT: call notdead +; RV64IZFH-NEXT: flh ft0, 0(sp) +; RV64IZFH-NEXT: fadd.h fa0, ft0, fs0 +; RV64IZFH-NEXT: flw fs0, 4(sp) +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret + %1 = alloca half, align 4 + %2 = bitcast half* %1 to i8* + call void @notdead(i8* %2) + %3 = load half, half* %1 + %4 = fadd half %3, %a ; force load in to FPR16 + ret half %4 +} + +define void @fsh_stack(half %a, half %b) nounwind { +; RV32IZFH-LABEL: fsh_stack: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) +; RV32IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV32IZFH-NEXT: fsh ft0, 8(sp) +; RV32IZFH-NEXT: addi a0, sp, 8 +; RV32IZFH-NEXT: call notdead +; RV32IZFH-NEXT: lw ra, 12(sp) +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fsh_stack: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) +; RV64IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV64IZFH-NEXT: fsh ft0, 4(sp) +; RV64IZFH-NEXT: addi a0, sp, 4 +; RV64IZFH-NEXT: call notdead +; RV64IZFH-NEXT: ld ra, 8(sp) +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret + %1 = fadd half %a, %b ; force store from FPR16 + %2 = alloca half, align 4 + store half %1, half* %2 + %3 = bitcast half* %2 to i8* + call void @notdead(i8* %3) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll @@ -0,0 +1,421 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f < %s | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck -check-prefix=RV64IZFH %s + +define half @select_fcmp_false(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_false: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_false: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: ret + %1 = fcmp false half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_oeq(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_oeq: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: bnez a0, .LBB1_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB1_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_oeq: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: bnez a0, .LBB1_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB1_2: +; RV64IZFH-NEXT: ret + %1 = fcmp oeq half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_ogt(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_ogt: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: flt.h a0, fa1, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB2_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB2_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_ogt: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: flt.h a0, fa1, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB2_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB2_2: +; RV64IZFH-NEXT: ret + %1 = fcmp ogt half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_oge(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_oge: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fle.h a0, fa1, fa0 +; RV32IZFH-NEXT: bnez a0, .LBB3_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB3_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_oge: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fle.h a0, fa1, fa0 +; RV64IZFH-NEXT: bnez a0, .LBB3_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB3_2: +; RV64IZFH-NEXT: ret + %1 = fcmp oge half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_olt(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_olt: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: bnez a0, .LBB4_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB4_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_olt: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: bnez a0, .LBB4_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB4_2: +; RV64IZFH-NEXT: ret + %1 = fcmp olt half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_ole(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_ole: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fle.h a0, fa0, fa1 +; RV32IZFH-NEXT: bnez a0, .LBB5_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB5_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_ole: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fle.h a0, fa0, fa1 +; RV64IZFH-NEXT: bnez a0, .LBB5_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB5_2: +; RV64IZFH-NEXT: ret + %1 = fcmp ole half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_one(half %a, half %b) nounwind { +; TODO: feq.h+sltiu+bne sequence could be optimised +; RV32IZFH-LABEL: select_fcmp_one: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa1, fa1 +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: feq.h a1, fa0, fa1 +; RV32IZFH-NEXT: not a1, a1 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: bnez a0, .LBB6_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB6_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_one: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa1, fa1 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: feq.h a1, fa0, fa1 +; RV64IZFH-NEXT: not a1, a1 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: bnez a0, .LBB6_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB6_2: +; RV64IZFH-NEXT: ret + %1 = fcmp one half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_ord(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_ord: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa1, fa1 +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: bnez a0, .LBB7_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB7_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_ord: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa1, fa1 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: bnez a0, .LBB7_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB7_2: +; RV64IZFH-NEXT: ret + %1 = fcmp ord half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_ueq(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_ueq: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: feq.h a1, fa1, fa1 +; RV32IZFH-NEXT: feq.h a2, fa0, fa0 +; RV32IZFH-NEXT: and a1, a2, a1 +; RV32IZFH-NEXT: seqz a1, a1 +; RV32IZFH-NEXT: or a0, a0, a1 +; RV32IZFH-NEXT: bnez a0, .LBB8_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB8_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_ueq: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: feq.h a1, fa1, fa1 +; RV64IZFH-NEXT: feq.h a2, fa0, fa0 +; RV64IZFH-NEXT: and a1, a2, a1 +; RV64IZFH-NEXT: seqz a1, a1 +; RV64IZFH-NEXT: or a0, a0, a1 +; RV64IZFH-NEXT: bnez a0, .LBB8_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB8_2: +; RV64IZFH-NEXT: ret + %1 = fcmp ueq half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_ugt(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_ugt: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fle.h a0, fa0, fa1 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: bnez a0, .LBB9_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB9_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_ugt: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fle.h a0, fa0, fa1 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: bnez a0, .LBB9_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB9_2: +; RV64IZFH-NEXT: ret + %1 = fcmp ugt half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_uge(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_uge: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: flt.h a0, fa0, fa1 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: bnez a0, .LBB10_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB10_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_uge: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: flt.h a0, fa0, fa1 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: bnez a0, .LBB10_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB10_2: +; RV64IZFH-NEXT: ret + %1 = fcmp uge half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_ult(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_ult: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fle.h a0, fa1, fa0 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: bnez a0, .LBB11_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB11_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_ult: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fle.h a0, fa1, fa0 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: bnez a0, .LBB11_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB11_2: +; RV64IZFH-NEXT: ret + %1 = fcmp ult half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_ule(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_ule: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: flt.h a0, fa1, fa0 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: bnez a0, .LBB12_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB12_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_ule: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: flt.h a0, fa1, fa0 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: bnez a0, .LBB12_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB12_2: +; RV64IZFH-NEXT: ret + %1 = fcmp ule half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_une(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_une: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa0, fa1 +; RV32IZFH-NEXT: xori a0, a0, 1 +; RV32IZFH-NEXT: bnez a0, .LBB13_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB13_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_une: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa0, fa1 +; RV64IZFH-NEXT: xori a0, a0, 1 +; RV64IZFH-NEXT: bnez a0, .LBB13_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB13_2: +; RV64IZFH-NEXT: ret + %1 = fcmp une half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_uno(half %a, half %b) nounwind { +; TODO: sltiu+bne could be optimized +; RV32IZFH-LABEL: select_fcmp_uno: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a0, fa1, fa1 +; RV32IZFH-NEXT: feq.h a1, fa0, fa0 +; RV32IZFH-NEXT: and a0, a1, a0 +; RV32IZFH-NEXT: seqz a0, a0 +; RV32IZFH-NEXT: bnez a0, .LBB14_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: fmv.h fa0, fa1 +; RV32IZFH-NEXT: .LBB14_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_uno: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a0, fa1, fa1 +; RV64IZFH-NEXT: feq.h a1, fa0, fa0 +; RV64IZFH-NEXT: and a0, a1, a0 +; RV64IZFH-NEXT: seqz a0, a0 +; RV64IZFH-NEXT: bnez a0, .LBB14_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: fmv.h fa0, fa1 +; RV64IZFH-NEXT: .LBB14_2: +; RV64IZFH-NEXT: ret + %1 = fcmp uno half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +define half @select_fcmp_true(half %a, half %b) nounwind { +; RV32IZFH-LABEL: select_fcmp_true: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: select_fcmp_true: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: ret + %1 = fcmp true half %a, %b + %2 = select i1 %1, half %a, half %b + ret half %2 +} + +; Ensure that ISel succeeds for a select+fcmp that has an i32 result type. +define i32 @i32_select_fcmp_oeq(half %a, half %b, i32 %c, i32 %d) nounwind { +; RV32IZFH-LABEL: i32_select_fcmp_oeq: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: feq.h a2, fa0, fa1 +; RV32IZFH-NEXT: bnez a2, .LBB16_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: mv a0, a1 +; RV32IZFH-NEXT: .LBB16_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: i32_select_fcmp_oeq: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: feq.h a2, fa0, fa1 +; RV64IZFH-NEXT: bnez a2, .LBB16_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: mv a0, a1 +; RV64IZFH-NEXT: .LBB16_2: +; RV64IZFH-NEXT: ret + %1 = fcmp oeq half %a, %b + %2 = select i1 %1, i32 %c, i32 %d + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +; This file provides a simple sanity check of half operations for +; RV32I and RV64I. This is primarily intended to ensure that custom +; legalisation or DAG combines aren't incorrectly triggered when the Zfh +; extension isn't enabled. + +define half @half_test(half %a, half %b) nounwind { +; RV32I-LABEL: half_test: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: sw s0, 8(sp) +; RV32I-NEXT: sw s1, 4(sp) +; RV32I-NEXT: sw s2, 0(sp) +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi s0, a1, -1 +; RV32I-NEXT: and a0, a0, s0 +; RV32I-NEXT: call __gnu_h2f_ieee +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s2, s0 +; RV32I-NEXT: call __gnu_h2f_ieee +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __addsf3 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: call __divsf3 +; RV32I-NEXT: call __gnu_f2h_ieee +; RV32I-NEXT: lw s2, 0(sp) +; RV32I-NEXT: lw s1, 4(sp) +; RV32I-NEXT: lw s0, 8(sp) +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: half_test: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) +; RV64I-NEXT: sd s0, 16(sp) +; RV64I-NEXT: sd s1, 8(sp) +; RV64I-NEXT: sd s2, 0(sp) +; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw s0, a1, -1 +; RV64I-NEXT: and a0, a0, s0 +; RV64I-NEXT: call __gnu_h2f_ieee +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s2, s0 +; RV64I-NEXT: call __gnu_h2f_ieee +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call __divsf3 +; RV64I-NEXT: call __gnu_f2h_ieee +; RV64I-NEXT: ld s2, 0(sp) +; RV64I-NEXT: ld s1, 8(sp) +; RV64I-NEXT: ld s0, 16(sp) +; RV64I-NEXT: ld ra, 24(sp) +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %1 = fadd half %a, %b + %2 = fdiv half %1, %b + ret half %2 +} diff --git a/llvm/test/CodeGen/RISCV/rv64f-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64f-half-convert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64f-half-convert.ll @@ -0,0 +1,171 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f < %s | FileCheck %s -check-prefix=RV64IZFH + +; This file exhaustively checks half<->i32 conversions. In general, +; fcvt.l[u].h can be selected instead of fcvt.w[u].h because poison is +; generated for an fpto[s|u]i conversion if the result doesn't fit in the +; target type. + +define i32 @aext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: aext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = fptosi half %a to i32 + ret i32 %1 +} + +define signext i32 @sext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: sext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = fptosi half %a to i32 + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: zext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: slli a0, a0, 32 +; RV64IZFH-NEXT: srli a0, a0, 32 +; RV64IZFH-NEXT: ret + %1 = fptosi half %a to i32 + ret i32 %1 +} + +define i32 @aext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: aext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = fptoui half %a to i32 + ret i32 %1 +} + +define signext i32 @sext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: sext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = fptoui half %a to i32 + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: zext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = fptoui half %a to i32 + ret i32 %1 +} + +define i16 @bcvt_f16_to_aext_i16(half %a, half %b) nounwind { +; RV64IZFH-LABEL: bcvt_f16_to_aext_i16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV64IZFH-NEXT: fmv.x.h a0, ft0 +; RV64IZFH-NEXT: ret + %1 = fadd half %a, %b + %2 = bitcast half %1 to i16 + ret i16 %2 +} + +define signext i16 @bcvt_f16_to_sext_i16(half %a, half %b) nounwind { +; RV64IZFH-LABEL: bcvt_f16_to_sext_i16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV64IZFH-NEXT: fmv.x.h a0, ft0 +; RV64IZFH-NEXT: slli a0, a0, 48 +; RV64IZFH-NEXT: srai a0, a0, 48 +; RV64IZFH-NEXT: ret + %1 = fadd half %a, %b + %2 = bitcast half %1 to i16 + ret i16 %2 +} + +define zeroext i16 @bcvt_f16_to_zext_i16(half %a, half %b) nounwind { +; RV64IZFH-LABEL: bcvt_f16_to_zext_i16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fadd.h ft0, fa0, fa1 +; RV64IZFH-NEXT: fmv.x.h a0, ft0 +; RV64IZFH-NEXT: lui a1, 16 +; RV64IZFH-NEXT: addiw a1, a1, -1 +; RV64IZFH-NEXT: and a0, a0, a1 +; RV64IZFH-NEXT: ret + %1 = fadd half %a, %b + %2 = bitcast half %1 to i16 + ret i16 %2 +} + +define half @bcvt_i64_to_f16_via_i16(i64 %a, i64 %b) nounwind { +; RV64IZFH-LABEL: bcvt_i64_to_f16_via_i16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmv.h.x ft0, a0 +; RV64IZFH-NEXT: fmv.h.x ft1, a1 +; RV64IZFH-NEXT: fadd.h fa0, ft0, ft1 +; RV64IZFH-NEXT: ret + %1 = trunc i64 %a to i16 + %2 = trunc i64 %b to i16 + %3 = bitcast i16 %1 to half + %4 = bitcast i16 %2 to half + %5 = fadd half %3, %4 + ret half %5 +} + +define half @uitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFH-LABEL: uitofp_aext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = uitofp i32 %a to half + ret half %1 +} + +define half @uitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFH-LABEL: uitofp_sext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = uitofp i32 %a to half + ret half %1 +} + +define half @uitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFH-LABEL: uitofp_zext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = uitofp i32 %a to half + ret half %1 +} + +define half @sitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFH-LABEL: sitofp_aext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = sitofp i32 %a to half + ret half %1 +} + +define half @sitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFH-LABEL: sitofp_sext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IZFH-NEXT: ret + %1 = sitofp i32 %a to half + ret half %1 +} + +define half @sitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFH-LABEL: sitofp_zext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = sitofp i32 %a to half + ret half %1 +} diff --git a/llvm/test/CodeGen/RISCV/zfh-imm.ll b/llvm/test/CodeGen/RISCV/zfh-imm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zfh-imm.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -target-abi ilp32f -mattr=+experimental-zfh < %s \ +; RUN: | FileCheck --check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+experimental-zfh,+d < %s \ +; RUN: | FileCheck --check-prefix=RV32IDZFH %s +; RUN: llc -mtriple=riscv64 -target-abi lp64f -mattr=+experimental-zfh < %s \ +; RUN: | FileCheck --check-prefix=RV64IZFH %s +; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+experimental-zfh,+d < %s \ +; RUN: | FileCheck --check-prefix=RV64IDZFH %s + +define half @f16_positive_zero(half *%pf) nounwind { +; RV32IZFH-LABEL: f16_positive_zero: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmv.h.x fa0, zero +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: f16_positive_zero: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fmv.h.x fa0, zero +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: f16_positive_zero: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmv.h.x fa0, zero +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: f16_positive_zero: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fmv.h.x fa0, zero +; RV64IDZFH-NEXT: ret + ret half 0.0 +} + +define half @f16_negative_zero(half *%pf) nounwind { +; RV32IZFH-LABEL: f16_negative_zero: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IZFH-NEXT: flh fa0, %lo(.LCPI1_0)(a0) +; RV32IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: f16_negative_zero: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV32IDZFH-NEXT: flh fa0, %lo(.LCPI1_0)(a0) +; RV32IDZFH-NEXT: ret +; +; RV64IZFH-LABEL: f16_negative_zero: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV64IZFH-NEXT: flh fa0, %lo(.LCPI1_0)(a0) +; RV64IZFH-NEXT: ret +; +; RV64IDZFH-LABEL: f16_negative_zero: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lui a0, %hi(.LCPI1_0) +; RV64IDZFH-NEXT: flh fa0, %lo(.LCPI1_0)(a0) +; RV64IDZFH-NEXT: ret + ret half -0.0 +} diff --git a/llvm/test/MC/RISCV/rv32i-invalid.s b/llvm/test/MC/RISCV/rv32i-invalid.s --- a/llvm/test/MC/RISCV/rv32i-invalid.s +++ b/llvm/test/MC/RISCV/rv32i-invalid.s @@ -172,6 +172,7 @@ mul a4, ra, s0 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'M' (Integer Multiplication and Division) amomaxu.w s5, s4, (s3) # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'A' (Atomic Instructions) fadd.s ft0, ft1, ft2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'F' (Single-Precision Floating-Point) +fadd.h ft0, ft1, ft2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zfh' (Half-Precision Floating-Point) # Using floating point registers when integer registers are expected addi a2, ft0, 24 # CHECK: :[[@LINE]]:10: error: invalid operand for instruction diff --git a/llvm/test/MC/RISCV/rv32zfh-invalid.s b/llvm/test/MC/RISCV/rv32zfh-invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zfh-invalid.s @@ -0,0 +1,36 @@ +# RUN: not llvm-mc -triple riscv32 -mattr=+experimental-zfh < %s 2>&1 | \ +# RUN: FileCheck %s + +# Out of range immediates +## simm12 +flh ft1, -2049(a0) # CHECK: :[[@LINE]]:10: error: operand must be a symbol with %lo/%pcrel_lo/%tprel_lo modifier or an integer in the range [-2048, 2047] +fsh ft2, 2048(a1) # CHECK: :[[@LINE]]:10: error: operand must be a symbol with %lo/%pcrel_lo/%tprel_lo modifier or an integer in the range [-2048, 2047] + +# Memory operand not formatted correctly +flh ft1, a0, -200 # CHECK: :[[@LINE]]:14: error: invalid operand for instruction +fsw ft2, a1, 100 # CHECK: :[[@LINE]]:14: error: invalid operand for instruction + +# Invalid register names +flh ft15, 100(a0) # CHECK: :[[@LINE]]:5: error: invalid operand for instruction +flh ft1, 100(a10) # CHECK: :[[@LINE]]:14: error: expected register +fsgnjn.h fa100, fa2, fa3 # CHECK: :[[@LINE]]:10: error: invalid operand for instruction + +# Integer registers where FP regs are expected +fmv.x.h fs7, a2 # CHECK: :[[@LINE]]:9: error: invalid operand for instruction + +# FP registers where integer regs are expected +fmv.h.x a8, ft2 # CHECK: :[[@LINE]]:9: error: invalid operand for instruction + +# Rounding mode when a register is expected +fmadd.h f10, f11, f12, ree # CHECK: :[[@LINE]]:24: error: invalid operand for instruction + +# Invalid rounding modes +fmadd.h f10, f11, f12, f13, ree # CHECK: :[[@LINE]]:29: error: operand must be a valid floating point rounding mode mnemonic +fmsub.h f14, f15, f16, f17, 0 # CHECK: :[[@LINE]]:29: error: operand must be a valid floating point rounding mode mnemonic +fnmsub.h f18, f19, f20, f21, 0b111 # CHECK: :[[@LINE]]:30: error: operand must be a valid floating point rounding mode mnemonic + +# Integer registers where FP regs are expected +fadd.h a2, a1, a0 # CHECK: :[[@LINE]]:8: error: invalid operand for instruction + +# FP registers where integer regs are expected +fcvt.wu.h ft2, a1 # CHECK: :[[@LINE]]:11: error: invalid operand for instruction diff --git a/llvm/test/MC/RISCV/rv32zfh-valid.s b/llvm/test/MC/RISCV/rv32zfh-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zfh-valid.s @@ -0,0 +1,166 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zfh -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zfh -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+experimental-zfh < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zfh -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-OBJ,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zfh < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zfh -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-OBJ,CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: flh ft0, 12(a0) +# CHECK-ASM: encoding: [0x07,0x10,0xc5,0x00] +flh f0, 12(a0) +# CHECK-ASM-AND-OBJ: flh ft1, 4(ra) +# CHECK-ASM: encoding: [0x87,0x90,0x40,0x00] +flh f1, +4(ra) +# CHECK-ASM-AND-OBJ: flh ft2, -2048(a3) +# CHECK-ASM: encoding: [0x07,0x91,0x06,0x80] +flh f2, -2048(x13) +# CHECK-ASM-AND-OBJ: flh ft3, -2048(s1) +# CHECK-ASM: encoding: [0x87,0x91,0x04,0x80] +flh f3, %lo(2048)(s1) +# CHECK-ASM-AND-OBJ: flh ft4, 2047(s2) +# CHECK-ASM: encoding: [0x07,0x12,0xf9,0x7f] +flh f4, 2047(s2) +# CHECK-ASM-AND-OBJ: flh ft5, 0(s3) +# CHECK-ASM: encoding: [0x87,0x92,0x09,0x00] +flh f5, 0(s3) + +# CHECK-ASM-AND-OBJ: fsh ft6, 2047(s4) +# CHECK-ASM: encoding: [0xa7,0x1f,0x6a,0x7e] +fsh f6, 2047(s4) +# CHECK-ASM-AND-OBJ: fsh ft7, -2048(s5) +# CHECK-ASM: encoding: [0x27,0x90,0x7a,0x80] +fsh f7, -2048(s5) +# CHECK-ASM-AND-OBJ: fsh fs0, -2048(s6) +# CHECK-ASM: encoding: [0x27,0x10,0x8b,0x80] +fsh f8, %lo(2048)(s6) +# CHECK-ASM-AND-OBJ: fsh fs1, 999(s7) +# CHECK-ASM: encoding: [0xa7,0x93,0x9b,0x3e] +fsh f9, 999(s7) + +# CHECK-ASM-AND-OBJ: fmadd.h fa0, fa1, fa2, fa3, dyn +# CHECK-ASM: encoding: [0x43,0xf5,0xc5,0x6c] +fmadd.h f10, f11, f12, f13, dyn +# CHECK-ASM-AND-OBJ: fmsub.h fa4, fa5, fa6, fa7, dyn +# CHECK-ASM: encoding: [0x47,0xf7,0x07,0x8d] +fmsub.h f14, f15, f16, f17, dyn +# CHECK-ASM-AND-OBJ: fnmsub.h fs2, fs3, fs4, fs5, dyn +# CHECK-ASM: encoding: [0x4b,0xf9,0x49,0xad] +fnmsub.h f18, f19, f20, f21, dyn +# CHECK-ASM-AND-OBJ: fnmadd.h fs6, fs7, fs8, fs9, dyn +# CHECK-ASM: encoding: [0x4f,0xfb,0x8b,0xcd] +fnmadd.h f22, f23, f24, f25, dyn + +# CHECK-ASM-AND-OBJ: fadd.h fs10, fs11, ft8, dyn +# CHECK-ASM: encoding: [0x53,0xfd,0xcd,0x05] +fadd.h f26, f27, f28, dyn +# CHECK-ASM-AND-OBJ: fsub.h ft9, ft10, ft11, dyn +# CHECK-ASM: encoding: [0xd3,0x7e,0xff,0x0d] +fsub.h f29, f30, f31, dyn +# CHECK-ASM-AND-OBJ: fmul.h ft0, ft1, ft2, dyn +# CHECK-ASM: encoding: [0x53,0xf0,0x20,0x14] +fmul.h ft0, ft1, ft2, dyn +# CHECK-ASM-AND-OBJ: fdiv.h ft3, ft4, ft5, dyn +# CHECK-ASM: encoding: [0xd3,0x71,0x52,0x1c] +fdiv.h ft3, ft4, ft5, dyn +# CHECK-ASM-AND-OBJ: fsqrt.h ft6, ft7, dyn +# CHECK-ASM: encoding: [0x53,0xf3,0x03,0x5c] +fsqrt.h ft6, ft7, dyn +# CHECK-ASM-AND-OBJ: fsgnj.h fs1, fa0, fa1 +# CHECK-ASM: encoding: [0xd3,0x04,0xb5,0x24] +fsgnj.h fs1, fa0, fa1 +# CHECK-ASM-AND-OBJ: fsgnjn.h fa1, fa3, fa4 +# CHECK-ASM: encoding: [0xd3,0x95,0xe6,0x24] +fsgnjn.h fa1, fa3, fa4 +# CHECK-ASM-AND-OBJ: fsgnjx.h fa4, fa3, fa2 +# CHECK-ASM: encoding: [0x53,0xa7,0xc6,0x24] +fsgnjx.h fa4, fa3, fa2 +# CHECK-ASM-AND-OBJ: fmin.h fa5, fa6, fa7 +# CHECK-ASM: encoding: [0xd3,0x07,0x18,0x2d] +fmin.h fa5, fa6, fa7 +# CHECK-ASM-AND-OBJ: fmax.h fs2, fs3, fs4 +# CHECK-ASM: encoding: [0x53,0x99,0x49,0x2d] +fmax.h fs2, fs3, fs4 +# CHECK-ASM-AND-OBJ: fcvt.w.h a0, fs5, dyn +# CHECK-ASM: encoding: [0x53,0xf5,0x0a,0xc4] +fcvt.w.h a0, fs5, dyn +# CHECK-ASM-AND-OBJ: fcvt.wu.h a1, fs6, dyn +# CHECK-ASM: encoding: [0xd3,0x75,0x1b,0xc4] +fcvt.wu.h a1, fs6, dyn +# CHECK-ASM-AND-OBJ: fmv.x.h a2, fs7 +# CHECK-ASM: encoding: [0x53,0x86,0x0b,0xe4] +fmv.x.h a2, fs7 +# CHECK-ASM-AND-OBJ: feq.h a1, fs8, fs9 +# CHECK-ASM: encoding: [0xd3,0x25,0x9c,0xa5] +feq.h a1, fs8, fs9 +# CHECK-ASM-AND-OBJ: flt.h a2, fs10, fs11 +# CHECK-ASM: encoding: [0x53,0x16,0xbd,0xa5] +flt.h a2, fs10, fs11 +# CHECK-ASM-AND-OBJ: fle.h a3, ft8, ft9 +# CHECK-ASM: encoding: [0xd3,0x06,0xde,0xa5] +fle.h a3, ft8, ft9 +# CHECK-ASM-AND-OBJ: fclass.h a3, ft10 +# CHECK-ASM: encoding: [0xd3,0x16,0x0f,0xe4] +fclass.h a3, ft10 +# CHECK-ASM-AND-OBJ: fcvt.h.w ft11, a4, dyn +# CHECK-ASM: encoding: [0xd3,0x7f,0x07,0xd4] +fcvt.h.w ft11, a4, dyn +# CHECK-ASM-AND-OBJ: fcvt.h.wu ft0, a5, dyn +# CHECK-ASM: encoding: [0x53,0xf0,0x17,0xd4] +fcvt.h.wu ft0, a5, dyn +# CHECK-ASM-AND-OBJ: fmv.h.x ft1, a6 +# CHECK-ASM: encoding: [0xd3,0x00,0x08,0xf4] +fmv.h.x ft1, a6 + +# Rounding modes + +# CHECK-ASM-AND-OBJ: fmadd.h fa0, fa1, fa2, fa3, rne +# CHECK-ASM: encoding: [0x43,0x85,0xc5,0x6c] +fmadd.h f10, f11, f12, f13, rne +# CHECK-ASM-AND-OBJ: fmsub.h fa4, fa5, fa6, fa7, rtz +# CHECK-ASM: encoding: [0x47,0x97,0x07,0x8d] +fmsub.h f14, f15, f16, f17, rtz +# CHECK-ASM-AND-OBJ: fnmsub.h fs2, fs3, fs4, fs5, rdn +# CHECK-ASM: encoding: [0x4b,0xa9,0x49,0xad] +fnmsub.h f18, f19, f20, f21, rdn +# CHECK-ASM-AND-OBJ: fnmadd.h fs6, fs7, fs8, fs9, rup +# CHECK-ASM: encoding: [0x4f,0xbb,0x8b,0xcd] +fnmadd.h f22, f23, f24, f25, rup +# CHECK-ASM-AND-OBJ: fmadd.h fa0, fa1, fa2, fa3, rmm +# CHECK-ASM: encoding: [0x43,0xc5,0xc5,0x6c] +fmadd.h f10, f11, f12, f13, rmm +# CHECK-ASM-AND-OBJ: fmsub.h fa4, fa5, fa6, fa7 +# CHECK-ASM: encoding: [0x47,0xf7,0x07,0x8d] +fmsub.h f14, f15, f16, f17, dyn + +# CHECK-ASM-AND-OBJ: fadd.h fs10, fs11, ft8, rne +# CHECK-ASM: encoding: [0x53,0x8d,0xcd,0x05] +fadd.h f26, f27, f28, rne +# CHECK-ASM-AND-OBJ: fsub.h ft9, ft10, ft11, rtz +# CHECK-ASM: encoding: [0xd3,0x1e,0xff,0x0d] +fsub.h f29, f30, f31, rtz +# CHECK-ASM-AND-OBJ: fmul.h ft0, ft1, ft2, rdn +# CHECK-ASM: encoding: [0x53,0xa0,0x20,0x14] +fmul.h ft0, ft1, ft2, rdn +# CHECK-ASM-AND-OBJ: fdiv.h ft3, ft4, ft5, rup +# CHECK-ASM: encoding: [0xd3,0x31,0x52,0x1c] +fdiv.h ft3, ft4, ft5, rup + +# CHECK-ASM-AND-OBJ: fsqrt.h ft6, ft7, rmm +# CHECK-ASM: encoding: [0x53,0xc3,0x03,0x5c] +fsqrt.h ft6, ft7, rmm +# CHECK-ASM-AND-OBJ: fcvt.w.h a0, fs5, rup +# CHECK-ASM: encoding: [0x53,0xb5,0x0a,0xc4] +fcvt.w.h a0, fs5, rup +# CHECK-ASM-AND-OBJ: fcvt.wu.h a1, fs6, rdn +# CHECK-ASM: encoding: [0xd3,0x25,0x1b,0xc4] +fcvt.wu.h a1, fs6, rdn +# CHECK-ASM-AND-OBJ: fcvt.h.w ft11, a4, rtz +# CHECK-ASM: encoding: [0xd3,0x1f,0x07,0xd4] +fcvt.h.w ft11, a4, rtz +# CHECK-ASM-AND-OBJ: fcvt.h.wu ft0, a5, rne +# CHECK-ASM: encoding: [0x53,0x80,0x17,0xd4] +fcvt.h.wu ft0, a5, rne diff --git a/llvm/test/MC/RISCV/rv64zfh-invalid.s b/llvm/test/MC/RISCV/rv64zfh-invalid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zfh-invalid.s @@ -0,0 +1,10 @@ +# RUN: not llvm-mc -triple riscv64 -mattr=+experimental-zfh < %s 2>&1 | \ +# RUN: FileCheck %s + +# Integer registers where FP regs are expected +fcvt.l.h ft0, a0 # CHECK: :[[@LINE]]:10: error: invalid operand for instruction +fcvt.lu.h ft1, a1 # CHECK: :[[@LINE]]:11: error: invalid operand for instruction + +# FP registers where integer regs are expected +fcvt.h.l a2, ft2 # CHECK: :[[@LINE]]:10: error: invalid operand for instruction +fcvt.h.lu a3, ft3 # CHECK: :[[@LINE]]:11: error: invalid operand for instruction diff --git a/llvm/test/MC/RISCV/rv64zfh-valid.s b/llvm/test/MC/RISCV/rv64zfh-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rv64zfh-valid.s @@ -0,0 +1,39 @@ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zfh -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zfh < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zfh -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-OBJ,CHECK-ASM-AND-OBJ %s +# +# RUN: not llvm-mc -triple riscv32 -mattr=+experimental-zfh < %s 2>&1 \ +# RUN: | FileCheck -check-prefix=CHECK-RV32 %s + +# CHECK-ASM-AND-OBJ: fcvt.l.h a0, ft0, dyn +# CHECK-ASM: encoding: [0x53,0x75,0x20,0xc4] +# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set +fcvt.l.h a0, ft0, dyn +# CHECK-ASM-AND-OBJ: fcvt.lu.h a1, ft1, dyn +# CHECK-ASM: encoding: [0xd3,0xf5,0x30,0xc4] +# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set +fcvt.lu.h a1, ft1, dyn +# CHECK-ASM-AND-OBJ: fcvt.h.l ft2, a2, dyn +# CHECK-ASM: encoding: [0x53,0x71,0x26,0xd4] +# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set +fcvt.h.l ft2, a2, dyn +# CHECK-ASM-AND-OBJ: fcvt.h.lu ft3, a3, dyn +# CHECK-ASM: encoding: [0xd3,0xf1,0x36,0xd4] +# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set +fcvt.h.lu ft3, a3, dyn + +# Rounding modes +# CHECK-ASM-AND-OBJ: fcvt.l.h a4, ft4, rne +# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set +fcvt.l.h a4, ft4, rne +# CHECK-ASM-AND-OBJ: fcvt.lu.h a5, ft5, rtz +# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set +fcvt.lu.h a5, ft5, rtz +# CHECK-ASM-AND-OBJ: fcvt.h.l ft6, a6, rdn +# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set +fcvt.h.l ft6, a6, rdn +# CHECK-ASM-AND-OBJ: fcvt.h.lu ft7, a7, rup +# CHECK-RV32: :[[@LINE+1]]:1: error: instruction requires the following: RV64I Base Instruction Set +fcvt.h.lu ft7, a7, rup diff --git a/llvm/test/MC/RISCV/rvzfh-aliases-valid.s b/llvm/test/MC/RISCV/rvzfh-aliases-valid.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rvzfh-aliases-valid.s @@ -0,0 +1,99 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zfh -riscv-no-aliases \ +# RUN: | FileCheck -check-prefix=CHECK-INST %s +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zfh \ +# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zfh -riscv-no-aliases \ +# RUN: | FileCheck -check-prefix=CHECK-INST %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zfh \ +# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+experimental-zfh < %s \ +# RUN: | llvm-objdump -d --mattr=+experimental-zfh -M no-aliases - \ +# RUN: | FileCheck -check-prefix=CHECK-INST %s +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+experimental-zfh < %s \ +# RUN: | llvm-objdump -d --mattr=+experimental-zfh - \ +# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s +# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+experimental-zfh < %s \ +# RUN: | llvm-objdump -d --mattr=+experimental-zfh -M no-aliases - \ +# RUN: | FileCheck -check-prefix=CHECK-INST %s +# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+experimental-zfh < %s \ +# RUN: | llvm-objdump -d --mattr=+experimental-zfh - \ +# RUN: | FileCheck -check-prefix=CHECK-ALIAS %s + +##===----------------------------------------------------------------------===## +## Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) +##===----------------------------------------------------------------------===## + +# CHECK-INST: fsgnj.h ft0, ft1, ft1 +# CHECK-ALIAS: fmv.h ft0, ft1 +fmv.h f0, f1 +# CHECK-INST: fsgnjx.h ft1, ft2, ft2 +# CHECK-ALIAS: fabs.h ft1, ft2 +fabs.h f1, f2 +# CHECK-INST: fsgnjn.h ft2, ft3, ft3 +# CHECK-ALIAS: fneg.h ft2, ft3 +fneg.h f2, f3 + +# CHECK-INST: flt.h tp, ft6, ft5 +# CHECK-ALIAS: flt.h tp, ft6, ft5 +fgt.h x4, f5, f6 +# CHECK-INST: fle.h t2, fs1, fs0 +# CHECK-ALIAS: fle.h t2, fs1, fs0 +fge.h x7, f8, f9 + +# CHECK-INST: fmv.x.h a2, fs7 +# CHECK-ALIAS: fmv.x.h a2, fs7 +fmv.x.h a2, fs7 +# CHECK-INST: fmv.h.x ft1, a6 +# CHECK-ALIAS: fmv.h.x ft1, a6 +fmv.h.x ft1, a6 + +# CHECK-INST: flh ft0, 0(a0) +# CHECK-ALIAS: flh ft0, 0(a0) +flh f0, (x10) +# CHECK-INST: fsh ft0, 0(a0) +# CHECK-ALIAS: fsh ft0, 0(a0) +fsh f0, (x10) + +##===----------------------------------------------------------------------===## +## Aliases which omit the rounding mode. +##===----------------------------------------------------------------------===## + +# CHECK-INST: fmadd.h fa0, fa1, fa2, fa3, dyn +# CHECK-ALIAS: fmadd.h fa0, fa1, fa2, fa3{{[[:space:]]}} +fmadd.h f10, f11, f12, f13 +# CHECK-INST: fmsub.h fa4, fa5, fa6, fa7, dyn +# CHECK-ALIAS: fmsub.h fa4, fa5, fa6, fa7{{[[:space:]]}} +fmsub.h f14, f15, f16, f17 +# CHECK-INST: fnmsub.h fs2, fs3, fs4, fs5, dyn +# CHECK-ALIAS: fnmsub.h fs2, fs3, fs4, fs5{{[[:space:]]}} +fnmsub.h f18, f19, f20, f21 +# CHECK-INST: fnmadd.h fs6, fs7, fs8, fs9, dyn +# CHECK-ALIAS: fnmadd.h fs6, fs7, fs8, fs9{{[[:space:]]}} +fnmadd.h f22, f23, f24, f25 +# CHECK-INST: fadd.h fs10, fs11, ft8, dyn +# CHECK-ALIAS: fadd.h fs10, fs11, ft8{{[[:space:]]}} +fadd.h f26, f27, f28 +# CHECK-INST: fsub.h ft9, ft10, ft11, dyn +# CHECK-ALIAS: fsub.h ft9, ft10, ft11{{[[:space:]]}} +fsub.h f29, f30, f31 +# CHECK-INST: fmul.h ft0, ft1, ft2, dyn +# CHECK-ALIAS: fmul.h ft0, ft1, ft2{{[[:space:]]}} +fmul.h ft0, ft1, ft2 +# CHECK-INST: fdiv.h ft3, ft4, ft5, dyn +# CHECK-ALIAS: fdiv.h ft3, ft4, ft5{{[[:space:]]}} +fdiv.h ft3, ft4, ft5 +# CHECK-INST: fsqrt.h ft6, ft7, dyn +# CHECK-ALIAS: fsqrt.h ft6, ft7{{[[:space:]]}} +fsqrt.h ft6, ft7 +# CHECK-INST: fcvt.w.h a0, fs5, dyn +# CHECK-ALIAS: fcvt.w.h a0, fs5{{[[:space:]]}} +fcvt.w.h a0, fs5 +# CHECK-INST: fcvt.wu.h a1, fs6, dyn +# CHECK-ALIAS: fcvt.wu.h a1, fs6{{[[:space:]]}} +fcvt.wu.h a1, fs6 +# CHECK-INST: fcvt.h.w ft11, a4, dyn +# CHECK-ALIAS: fcvt.h.w ft11, a4{{[[:space:]]}} +fcvt.h.w ft11, a4 +# CHECK-INST: fcvt.h.wu ft0, a5, dyn +# CHECK-ALIAS: fcvt.h.wu ft0, a5{{[[:space:]]}} +fcvt.h.wu ft0, a5 diff --git a/llvm/test/MC/RISCV/rvzfh-pseudos.s b/llvm/test/MC/RISCV/rvzfh-pseudos.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/RISCV/rvzfh-pseudos.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zfh | FileCheck %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zfh | FileCheck %s + +# CHECK: .Lpcrel_hi0: +# CHECK: auipc a2, %pcrel_hi(a_symbol) +# CHECK: flh fa2, %pcrel_lo(.Lpcrel_hi0)(a2) +flh fa2, a_symbol, a2 + +# CHECK: .Lpcrel_hi1: +# CHECK: auipc a3, %pcrel_hi(a_symbol) +# CHECK: fsh fa2, %pcrel_lo(.Lpcrel_hi1)(a3) +fsh fa2, a_symbol, a3