Index: lib/Target/Mips/MipsMSAInstrInfo.td =================================================================== --- lib/Target/Mips/MipsMSAInstrInfo.td +++ lib/Target/Mips/MipsMSAInstrInfo.td @@ -3731,6 +3731,56 @@ def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; +// Pseudoes used to implement transparent fp16 support. + +let Predicates = [HasMSA] in { + def ST_F16 : MipsPseudo<(outs), (ins MSA128F16:$ws, mem_simm10:$addr), + [(store (f16 MSA128F16:$ws), (addrimm10:$addr))]> { + let usesCustomInserter = 1; + } + + def LD_F16 : MipsPseudo<(outs MSA128F16:$ws), (ins mem_simm10:$addr), + [(set MSA128F16:$ws, (f16 (load addrimm10:$addr)))]> { + let usesCustomInserter = 1; + } + + def MSA_FP_EXTEND_W_PSEUDO : MipsPseudo<(outs FGR32Opnd:$fd), + (ins MSA128F16:$ws), + [(set FGR32Opnd:$fd, + (f32 (fpextend MSA128F16:$ws)))]> { + let usesCustomInserter = 1; + } + + def MSA_FP_ROUND_W_PSEUDO : MipsPseudo<(outs MSA128F16:$wd), + (ins FGR32Opnd:$fs), + [(set MSA128F16:$wd, + (f16 (fpround FGR32Opnd:$fs)))]> { + let usesCustomInserter = 1; + } + + def MSA_FP_EXTEND_D_PSEUDO : MipsPseudo<(outs FGR64Opnd:$fd), + (ins MSA128F16:$ws), + [(set FGR64Opnd:$fd, + (f64 (fpextend MSA128F16:$ws)))]> { + let usesCustomInserter = 1; + } + + def MSA_FP_ROUND_D_PSEUDO : MipsPseudo<(outs MSA128F16:$wd), + (ins FGR64Opnd:$fs), + [(set MSA128F16:$wd, + (f16 (fpround FGR64Opnd:$fs)))]> { + let usesCustomInserter = 1; + } + + def : MipsPat<(MipsTruncIntFP MSA128F16:$ws), + (TRUNC_W_D64 (MSA_FP_EXTEND_D_PSEUDO MSA128F16:$ws))>; + + def : MipsPat<(MipsFPCmp MSA128F16:$ws, MSA128F16:$wt, imm:$cond), + (FCMP_S32 (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$ws), + (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$wt), imm:$cond)>, + ISA_MIPS1_NOT_32R6_64R6; +} + // Vector extraction with fixed index. // // Extracting 32-bit values on MSA32 should always use COPY_S_W rather than Index: lib/Target/Mips/MipsRegisterInfo.td =================================================================== --- lib/Target/Mips/MipsRegisterInfo.td +++ lib/Target/Mips/MipsRegisterInfo.td @@ -400,6 +400,8 @@ // This class allows us to represent this in codegen patterns. def FGRCC : RegisterClass<"Mips", [i32], 32, (sequence "F%u", 0, 31)>; +def MSA128F16 : RegisterClass<"Mips", [f16], 128, (sequence "W%u", 0, 31)>; + def MSA128B: RegisterClass<"Mips", [v16i8], 128, (sequence "W%u", 0, 31)>; def MSA128H: RegisterClass<"Mips", [v8i16, v8f16], 128, @@ -646,6 +648,10 @@ let ParserMatchClass = COP3AsmOperand; } +def MSA128F16Opnd : RegisterOperand { + let ParserMatchClass = MSA128AsmOperand; +} + def MSA128BOpnd : RegisterOperand { let ParserMatchClass = MSA128AsmOperand; } Index: lib/Target/Mips/MipsSEISelLowering.h =================================================================== --- lib/Target/Mips/MipsSEISelLowering.h +++ lib/Target/Mips/MipsSEISelLowering.h @@ -111,6 +111,20 @@ /// \brief Emit the FEXP2_D_1 pseudo instructions. MachineBasicBlock *emitFEXP2_D_1(MachineInstr &MI, MachineBasicBlock *BB) const; + /// \brief Emit the FILL_FW pseudo instruction + MachineBasicBlock *emitLD_F16_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB) const; + /// \brief Emit the FILL_FD pseudo instruction + MachineBasicBlock *emitST_F16_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB) const; + /// \brief Emit the FEXP2_W_1 pseudo instructions. + MachineBasicBlock *emitFPEXTEND_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsFGR64) const; + /// \brief Emit the FEXP2_D_1 pseudo instructions. + MachineBasicBlock *emitFPROUND_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BBi, + bool IsFGR64) const; }; } Index: lib/Target/Mips/MipsSEISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsSEISelLowering.cpp +++ lib/Target/Mips/MipsSEISelLowering.cpp @@ -92,6 +92,44 @@ addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); + // f16 is a storage-only type, always promote it to f32. + addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); + setOperationAction(ISD::SETCC, MVT::f16, Promote); + setOperationAction(ISD::BR_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); + setOperationAction(ISD::SELECT, MVT::f16, Promote); + setOperationAction(ISD::FADD, MVT::f16, Promote); + setOperationAction(ISD::FSUB, MVT::f16, Promote); + setOperationAction(ISD::FMUL, MVT::f16, Promote); + setOperationAction(ISD::FDIV, MVT::f16, Promote); + setOperationAction(ISD::FREM, MVT::f16, Promote); + setOperationAction(ISD::FMA, MVT::f16, Promote); + setOperationAction(ISD::FNEG, MVT::f16, Promote); + setOperationAction(ISD::FABS, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FSQRT, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FMINNUM, MVT::f16, Promote); + setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); + setOperationAction(ISD::FMINNAN, MVT::f16, Promote); + setOperationAction(ISD::FMAXNAN, MVT::f16, Promote); + setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::SRA); @@ -1172,6 +1210,18 @@ return emitFEXP2_W_1(MI, BB); case Mips::FEXP2_D_1_PSEUDO: return emitFEXP2_D_1(MI, BB); + case Mips::ST_F16: + return emitST_F16_PSEUDO(MI, BB); + case Mips::LD_F16: + return emitLD_F16_PSEUDO(MI, BB); + case Mips::MSA_FP_EXTEND_W_PSEUDO: + return emitFPEXTEND_PSEUDO(MI, BB, false); + case Mips::MSA_FP_ROUND_W_PSEUDO: + return emitFPROUND_PSEUDO(MI, BB, false); + case Mips::MSA_FP_EXTEND_D_PSEUDO: + return emitFPEXTEND_PSEUDO(MI, BB, true); + case Mips::MSA_FP_ROUND_D_PSEUDO: + return emitFPROUND_PSEUDO(MI, BB, true); } } @@ -3372,6 +3422,304 @@ return BB; } +// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA +// register. +// +// STF16 MSA128F16:$wd, mem_simm10:$addr +// => +// copy_u.h $rtemp,$wd[0] +// sh $rtemp, $addr +// +// Safety: We can't use st.h & co as they would over write the memory after +// the destination. It would require half floats be allocated 16 bytes(!) of +// space. +MachineBasicBlock * +MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB) const { + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Ws = MI.getOperand(0).getReg(); + unsigned Rt = MI.getOperand(1).getReg(); + const MachineMemOperand &MMO = **MI.memoperands_begin(); + unsigned Imm = MMO.getOffset(); + + // Caution: A load via the GOT can expand to a GPR32 operand, a load via + // spill and reload can expand as a GPR64 operand. Examine the + // operand in detail and default to ABI. + const TargetRegisterClass *RC = + MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) + : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass + : &Mips::GPR64RegClass); + const bool UsingMips32 = RC == &Mips::GPR32RegClass; + unsigned Rs = RegInfo.createVirtualRegister(RC); + + BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); + BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) + .addReg(Rs) + .addReg(Rt) + .addImm(Imm) + .addMemOperand(BB->getParent()->getMachineMemOperand( + &MMO, MMO.getOffset(), MMO.getSize())); + + MI.eraseFromParent(); + return BB; +} + +// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. +// +// LD_F16 MSA128F16:$wd, mem_simm10:$addr +// => +// lh $rtemp, $addr +// fill.h $wd, $rtemp +// +// Safety: We can't use ld.h & co as they over-read from the source. +// Additionally, if the address is not modulo 16, 2 cases can occur: +// a) Segmentation fault as the load instruction reads from a memory page +// memory it's not supposed to. +// b) The load crosses an implementation specific boundary, requiring OS +// intervention. +// +MachineBasicBlock * +MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB) const { + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Wd = MI.getOperand(0).getReg(); + + // Caution: A load via the GOT can expand to a GPR32 operand, a load via + // spill and reload can expand as a GPR64 operand. Examine the + // operand in detail and default to ABI. + const TargetRegisterClass *RC = + MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) + : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass + : &Mips::GPR64RegClass); + + const bool UsingMips32 = RC == &Mips::GPR32RegClass; + unsigned Rt = RegInfo.createVirtualRegister(RC); + + MachineInstrBuilder MIB = + BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); + for (unsigned i = 1; i < MI.getNumOperands(); i++) + MIB.addOperand(MI.getOperand(i)); + + BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); + + MI.eraseFromParent(); + return BB; +} + +// Emit the FPROUND_PSEUDO instruction. +// +// Round an FGR64Opnd, FGR32Opnd to an f16. +// +// Safety: Cycle the operand through the GPRs so the result always ends up +// the correct MSA register. +// +// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs +// / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register +// (which they can be, as the MSA registers are defined to alias the +// FPU's 64 bit and 32 bit registers) the result can be accessed using +// the correct register class. That requires operands be tie-able across +// register classes which have a sub/super register class relationship. +// +// For FPG32Opnd: +// +// FPROUND MSA128F16:$wd, FGR32Opnd:$fs +// => +// mfc1 $rtemp, $fs +// fill.w $rtemp, $wtemp +// fexdo.w $wd, $wtemp, $wtemp +// +// For FPG64Opnd on mips32r2+: +// +// FPROUND MSA128F16:$wd, FGR64Opnd:$fs +// => +// mfc1 $rtemp, $fs +// fill.w $rtemp, $wtemp +// mfhc1 $rtemp2, $fs +// insert.w $wtemp[1], $rtemp2 +// insert.w $wtemp[3], $rtemp2 +// fexdo.w $wtemp2, $wtemp, $wtemp +// fexdo.h $wd, $temp2, $temp2 +// +// For FGR64Opnd on mips64r2+: +// +// FPROUND MSA128F16:$wd, FGR64Opnd:$fs +// => +// dmfc1 $rtemp, $fs +// fill.d $rtemp, $wtemp +// fexdo.w $wtemp2, $wtemp, $wtemp +// fexdo.h $wd, $wtemp2, $wtemp2 +// +// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the +// undef bits are "just right" and the exception enable bits are +// set. By using fill.w to replicate $fs into all elements over +// insert.w for one element, we avoid that potiential case. If +// fexdo.[hw] causes an exception in, the exception is valid and it +// occurs for all elements. +// +MachineBasicBlock * +MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsFGR64) const { + + // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous + // here. It's technically doable to support MIPS32 here, but the ISA forbids + // it. + assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); + + bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Wd = MI.getOperand(0).getReg(); + unsigned Fs = MI.getOperand(1).getReg(); + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + const TargetRegisterClass *GPRRC = + IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; + unsigned MFC1Opc = IsFGR64onMips64 ? Mips::DMFC1 : Mips::MFC1; + unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; + + // Perform the register class copy as mentioned above. + unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); + BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); + unsigned WPHI = Wtemp; + + if (!Subtarget.hasMips64() && IsFGR64) { + unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); + unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) + .addReg(Wtemp) + .addReg(Rtemp2) + .addImm(1); + BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) + .addReg(Wtemp2) + .addReg(Rtemp2) + .addImm(3); + WPHI = Wtemp3; + } + + if (IsFGR64) { + unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) + .addReg(WPHI) + .addReg(WPHI); + WPHI = Wtemp2; + } + + BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); + + MI.eraseFromParent(); + return BB; +} + +// Emit the FPEXTEND_PSEUDO instruction. +// +// Expand an f16 to either a FGR32Opnd or FGR64Opnd. +// +// Safety: Cycle the result through the GPRs so the result always ends up +// the correct floating point register. +// +// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd +// / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register +// (which they can be, as the MSA registers are defined to alias the +// FPU's 64 bit and 32 bit registers) the result can be accessed using +// the correct register class. That requires operands be tie-able across +// register classes which have a sub/super register class relationship. I +// haven't checked. +// +// For FGR32Opnd: +// +// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws +// => +// fexupr.w $wtemp, $ws +// copy_s.w $rtemp, $ws[0] +// mtc1 $rtemp, $fd +// +// For FGR64Opnd on Mips64: +// +// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws +// => +// fexupr.w $wtemp, $ws +// fexupr.d $wtemp2, $wtemp +// copy_s.d $rtemp, $wtemp2s[0] +// dmtc1 $rtemp, $fd +// +// For FGR64Opnd on Mips32: +// +// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws +// => +// fexupr.w $wtemp, $ws +// fexupr.d $wtemp2, $wtemp +// copy_s.w $rtemp, $wtemp2[0] +// mtc1 $rtemp, $ftemp +// copy_s.w $rtemp2, $wtemp2[1] +// $fd = mthc1 $rtemp2, $ftemp +// +MachineBasicBlock * +MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsFGR64) const { + + // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous + // here. It's technically doable to support MIPS32 here, but the ISA forbids + // it. + assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); + + bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; + bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + unsigned Fd = MI.getOperand(0).getReg(); + unsigned Ws = MI.getOperand(1).getReg(); + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetRegisterClass *GPRRC = + IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; + unsigned MTC1Opc = IsFGR64onMips64 ? Mips::DMTC1 : Mips::MTC1; + unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; + + unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + unsigned WPHI = Wtemp; + + BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); + if (IsFGR64) { + WPHI = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); + } + + // Perform the safety regclass copy mentioned above. + unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC); + unsigned FPRPHI = IsFGR64onMips32 + ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) + : Fd; + BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); + BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); + + if (IsFGR64onMips32) { + unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) + .addReg(WPHI) + .addImm(1); + BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) + .addReg(FPRPHI) + .addReg(Rtemp2); + } + + MI.eraseFromParent(); + return BB; +} + // Emit the FEXP2_W_1 pseudo instructions. // // fexp2_w_1_pseudo $wd, $wt Index: test/CodeGen/Mips/msa/f16-llvm-ir.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/msa/f16-llvm-ir.ll @@ -0,0 +1,1147 @@ +; RUN: llc -relocation-model=pic -march=mipsel -mcpu=mips32r5 \ +; RUN: -mattr=+fp64,+msa < %s | FileCheck %s \ +; RUN: --check-prefixes=ALL,MIPS32,MIPSR5,MIPS32-O32,MIPS32R5-O32 +; RUN: llc -relocation-model=pic -march=mips64el -mcpu=mips64r5 \ +; RUN: -mattr=+fp64,+msa -target-abi n32 < %s | FileCheck %s \ +; RUN: --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N32,MIPS64R5-N32 +; RUN: llc -relocation-model=pic -march=mips64el -mcpu=mips64r5 \ +; RUN: -mattr=+fp64,+msa -target-abi n64 < %s | FileCheck %s \ +; RUN: --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N64,MIPS64R5-N64 + +; RUN: llc -relocation-model=pic -march=mipsel -mcpu=mips32r6 \ +; RUN: -mattr=+fp64,+msa < %s | FileCheck %s \ +; RUN: --check-prefixes=ALL,MIPS32,MIPSR6,MIPSR6-O32 +; RUN: llc -relocation-model=pic -march=mips64el -mcpu=mips64r6 \ +; RUN: -mattr=+fp64,+msa -target-abi n32 < %s | FileCheck %s \ +; RUN: --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N32,MIPSR6-N32 +; RUN: llc -relocation-model=pic -march=mips64el -mcpu=mips64r6 \ +; RUN: -mattr=+fp64,+msa -target-abi n64 < %s | FileCheck %s \ +; RUN: --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N64,MIPSR6-N64 + + +; Check the use of frame indexes in the msa pseudo f16 instructions. + +@k = external global float + +declare float @k2(half *) + +define void @f3(i16 %b) { +entry: +; ALL-LABEL: f3: + +; ALL: sh $4, [[O0:[0-9]+]]($sp) +; ALL-DAG: jalr $25 +; MIPS32-DAG: addiu $4, $sp, [[O0]] +; MIPS64-N32: addiu $4, $sp, [[O0]] +; MIPS64-N64: daddiu $4, $sp, [[O0]] +; ALL: swc1 $f0 + + %0 = alloca half + %1 = bitcast i16 %b to half + store half %1, half * %0 + %2 = call float @k2(half * %0) + store float %2, float * @k + ret void +} + +define void @f(i16 %b) { +; ALL-LABEL: f: + +; ALL: sh $4, [[O0:[0-9]+]]($sp) +; ALL: lh $[[R0:[0-9]+]], [[O0]]($sp) +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; ALL: swc1 $f[[F0]] + + %1 = bitcast i16 %b to half + %2 = fpext half %1 to float + store float %2, float * @k + ret void +} + +@g = external global i16, align 2 +@h = external global half, align 2 + +; Check that fext f16 to double has a fexupr.w, fexupr.d sequence. +; Check that ftrunc double to f16 has fexdo.w, fexdo.h sequence. +; Check that MIPS64R5+ uses 64-bit floating point <-> 64-bit GPR transfers. + +; We don't need to check if pre-MIPSR5 expansions occur, the MSA ASE requires +; MIPSR5. Additionally, fp64 mode / FR=1 is required to use MSA. + +define void @fadd_f64() { +entry: +; ALL-LABEL: fadd_f64: + %0 = load half, half * @h, align 2 + %1 = fpext half %0 to double +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: fexupr.d $w[[W2:[0-9]+]], $w[[W1]] +; MIPS32: copy_s.w $[[R1:[0-9]+]], $w[[W2]][0] +; MIPS32: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32: copy_s.w $[[R2:[0-9]+]], $w[[W2]][1] +; MIPS32: mthc1 $[[R2]], $f[[F0]] +; MIPS64: copy_s.d $[[R2:[0-9]+]], $w[[W2]][0] +; MIPS64: dmtc1 $[[R2]], $f[[F0:[0-9]+]] + + %2 = load half, half * @h, align 2 + %3 = fpext half %2 to double + %add = fadd double %1, %3 + +; ALL: add.d $f[[F1:[0-9]+]], $f[[F0]], $f[[F0]] + + %4 = fptrunc double %add to half + +; MIPS32: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; MIPS32: fill.w $w[[W2:[0-9]+]], $[[R2]] +; MIPS32: mfhc1 $[[R3:[0-9]+]], $f[[F1]] +; MIPS32: insert.w $w[[W2]][1], $[[R3]] +; MIPS32: insert.w $w[[W2]][3], $[[R3]] + +; MIPS64: dmfc1 $[[R2:[0-9]+]], $f[[F1]] +; MIPS64: fill.d $w[[W2:[0-9]+]], $[[R2]] + +; ALL: fexdo.w $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: fexdo.h $w[[W4:[0-9]+]], $w[[W3]], $w[[W3]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W4]][0] +; ALL: sh $[[R3]] + store half %4, half * @h, align 2 + ret void +} + +define i32 @ffptoui() { +entry: +; ALL-LABEL: ffptoui: + %0 = load half, half * @h, align 2 + %1 = fptoui half %0 to i32 + +; MIPS32: lwc1 $f[[FC:[0-9]+]], %lo($CPI{{[0-9]+}}_{{[0-9]+}}) +; MIPS64-N32: lwc1 $f[[FC:[0-9]+]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) +; MIPS64-N64: lwc1 $f[[FC:[0-9]+]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPSR6: cmp.lt.s $f[[F1:[0-9]+]], $f[[F0]], $f[[FC]] +; ALL: sub.s $f[[F2:[0-9]+]], $f[[F0]], $f[[FC]] +; ALL: mfc1 $[[R2:[0-9]]], $f[[F2]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: fexupr.w $w[[W4:[0-9]+]], $w[[W3]] +; ALL: fexupr.d $w[[W5:[0-9]+]], $w[[W4]] + +; MIPS32: copy_s.w $[[R3:[0-9]+]], $w[[W5]][0] +; MIPS32: mtc1 $[[R3]], $f[[F3:[0-9]+]] +; MIPS32: copy_s.w $[[R4:[0-9]+]], $w[[W5]][1] +; MIPS32: mthc1 $[[R3]], $f[[F3]] + +; MIPS64: copy_s.d $[[R2:[0-9]+]], $w[[W2]][0] +; MIPS64: dmtc1 $[[R2]], $f[[F3:[0-9]+]] + +; ALL: trunc.w.d $f[[F4:[0-9]+]], $f[[F3]] +; ALL: mfc1 $[[R4:[0-9]+]], $f[[F4]] +; ALL: fexupr.d $w[[W6:[0-9]+]], $w[[W1]] + +; MIPS32: copy_s.w $[[R5:[0-9]+]], $w[[W6]][0] +; MIPS32: mtc1 $[[R5]], $f[[F5:[0-9]+]] +; MIPS32: copy_s.w $[[R6:[0-9]+]], $w[[W6]][1] +; MIPS32: mthc1 $[[R6]], $f[[F5]] + +; MIPS64: copy_s.d $[[R2:[0-9]+]], $w[[W2]][0] +; MIPS64: dmtc1 $[[R2]], $f[[F5:[0-9]+]] + +; ALL: trunc.w.d $f[[F6:[0-9]]], $f[[F5]] +; ALL: mfc1 $[[R7:[0-9]]], $f[[F6]] + +; MIPS32R5-O32: lw $[[R13:[0-9]+]], %got($CPI{{[0-9]+}}_{{[0-9]+}}) +; MIPS32R5-O32: addiu $[[R14:[0-9]+]], $[[R13]], %lo($CPI{{[0-9]+}}_{{[0-9]+}}) + +; MIPS64R5-N32: lw $[[R13:[0-9]+]], %got_page(.LCPI{{[0-9]+}}_{{[0-9]+}}) +; MIPS64R5-N32: addiu $[[R14:[0-9]+]], $[[R13]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) + +; MIPS64R5-N64: ld $[[R13:[0-9]+]], %got_page(.LCPI{{[0-9]+}}_{{[0-9]+}}) +; MIPS64R5-N64: daddiu $[[R14:[0-9]+]], $[[R13]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) + +; ALL: lui $[[R8:[0-9]+]], 32768 +; ALL: xor $[[R9:[0-9]+]], $[[R4]], $[[R8]] + +; MIPSR5: lh $[[R15:[0-9]+]], 0($[[R14]]) +; MIPSR5: fill.h $w[[W7:[0-9]+]], $[[R15]] +; MIPSR5: fexupr.w $w[[W8:[0-9]+]], $w[[W7]] +; MIPSR5: copy_s.w $[[R16:[0-9]+]], $w[[W8]][0] +; MIPSR5: mtc1 $[[R16]], $f[[F7:[0-9]+]] +; MIPSR5: c.olt.s $f[[F0]], $f[[F7]] +; MIPSR5: movt $[[R9]], $[[R7]], $fcc0 + +; MIPSR6: mfc1 $[[R10:[0-9]+]], $f[[F1]] +; MIPSR6: seleqz $[[R11:[0-9]]], $[[R9]], $[[R10]] +; MIPSR6: selnez $[[R12:[0-9]]], $[[R7]], $[[R10]] +; MIPSR6: or $2, $[[R12]], $[[R11]] + + ret i32 %1 +} + +define i32 @ffptosi() { +entry: +; ALL-LABEL: ffptosi: + %0 = load half, half * @h, align 2 + %1 = fptosi half %0 to i32 + ret i32 %1 + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: fexupr.d $w[[W2:[0-9]+]], $w[[W1]] + +; MIPS32: copy_s.w $[[R2:[0-9]+]], $w[[W2]][0] +; MIPS32: mtc1 $[[R2]], $f[[F0:[0-9]+]] +; MIPS32: copy_s.w $[[R3:[0-9]+]], $w[[W2]][1] +; MIPS32: mthc1 $[[R3]], $f[[F0]] + +; MIPS64: copy_s.d $[[R2:[0-9]+]], $w[[W2]][0] +; MIPS64: dmtc1 $[[R2]], $f[[F0:[0-9]+]] + +; ALL: trunc.w.d $f[[F1:[0-9]+]], $f[[F0]] +; ALL: mfc1 $2, $f[[F1]] +} + +define void @uitofp(i32 %a) { +entry: +; ALL-LABEL: uitofp: + +; MIPS32-O32: ldc1 $f[[F0:[0-9]+]], %lo($CPI{{[0-9]+}}_{{[0-9]+}}) +; MIPS32-O32: ldc1 $f[[F1:[0-9]+]], 0($sp) + +; MIPS64-N32: ldc1 $f[[F0:[0-9]+]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) +; MIPS64-N32: ldc1 $f[[F1:[0-9]+]], 8($sp) + +; MIPS64-N64: ldc1 $f[[F0:[0-9]+]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) +; MIPS64-N64: ldc1 $f[[F1:[0-9]+]], 8($sp) + +; MIPSR5: sub.d $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]] +; MIPSR6-O32: sub.d $f[[F2:[0-9]+]], $f[[F0]], $f[[F1]] +; MIPSR6-N32: sub.d $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]] +; MIPSR6-N64: sub.d $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]] + +; MIPS32: mfc1 $[[R0:[0-9]+]], $f[[F2]] +; MIPS32: fill.w $w[[W0:[0-9]+]], $[[R0]] +; MIPS32: mfhc1 $[[R1:[0-9]+]], $f[[F2]] +; MIPS32: insert.w $w[[W0]][1], $[[R1]] +; MIPS32: insert.w $w[[W0]][3], $[[R1]] + +; MIPS64-N64: ld $[[R3:[0-9]+]], %got_disp(h) +; MIPS64-N32: lw $[[R3:[0-9]+]], %got_disp(h) +; MIPS64: dmfc1 $[[R1:[0-9]+]], $f[[F2]] +; MIPS64: fill.d $w[[W0:[0-9]+]], $[[R1]] + +; ALL: fexdo.w $w[[W1:[0-9]+]], $w[[W0]], $w[[W0]] +; ALL: fexdo.h $w[[W2:[0-9]+]], $w[[W1]], $w[[W1]] + +; MIPS32: lw $[[R3:[0-9]+]], %got(h) + +; ALL: copy_u.h $[[R2:[0-9]+]], $w[[W2]] +; ALL: sh $[[R2]], 0($[[R3]]) + %0 = uitofp i32 %a to half + store half %0, half * @h, align 2 + ret void +} + + +; Check that f16 is expanded to f32 and relevant transfer ops occur. +; We don't check f16 -> f64 expansion occurs, as we expand f16 to f32. + +define void @fadd() { +entry: +; ALL-LABEL: fadd: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] + + %2 = load i16, i16* @g, align 2 + %3 = call float @llvm.convert.from.fp16.f32(i16 %2) + %add = fadd float %1, %3 + +; ALL: add.s $f[[F1:[0-9]+]], $f[[F0]], $f[[F0]] + + %4 = call i16 @llvm.convert.to.fp16.f32(float %add) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] +; ALL: sh $[[R3]] + store i16 %4, i16* @g, align 2 + ret void +} + +; Function Attrs: nounwind readnone +declare float @llvm.convert.from.fp16.f32(i16) + +; Function Attrs: nounwind readnone +declare i16 @llvm.convert.to.fp16.f32(float) + +; Function Attrs: nounwind +define void @fsub() { +entry: +; ALL-LABEL: fsub: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] + + %2 = load i16, i16* @g, align 2 + %3 = call float @llvm.convert.from.fp16.f32(i16 %2) + %sub = fsub float %1, %3 + +; ALL: sub.s $f[[F1:[0-9]+]], $f[[F0]], $f[[F0]] + + %4 = call i16 @llvm.convert.to.fp16.f32(float %sub) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %4, i16* @g, align 2 +; ALL: sh $[[R3]] + ret void +} + +define void @fmult() { +entry: +; ALL-LABEL: fmult: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] + + %2 = load i16, i16* @g, align 2 + %3 = call float @llvm.convert.from.fp16.f32(i16 %2) + %mul = fmul float %1, %3 + +; ALL: mul.s $f[[F1:[0-9]+]], $f[[F0]], $f[[F0]] + + %4 = call i16 @llvm.convert.to.fp16.f32(float %mul) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %4, i16* @g, align 2 + +; ALL: sh $[[R3]] + ret void +} + +define void @fdiv() { +entry: +; ALL-LABEL: fdiv: + + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] + + %2 = load i16, i16* @g, align 2 + %3 = call float @llvm.convert.from.fp16.f32(i16 %2) + %div = fdiv float %1, %3 + +; ALL: div.s $f[[F1:[0-9]+]], $f[[F0]], $f[[F0]] + + %4 = call i16 @llvm.convert.to.fp16.f32(float %div) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + store i16 %4, i16* @g, align 2 +; ALL: sh $[[R3]] + ret void +} + +define void @frem() { +entry: +; ALL-LABEL: frem: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] + + %2 = load i16, i16* @g, align 2 + %3 = call float @llvm.convert.from.fp16.f32(i16 %2) + %rem = frem float %1, %3 + +; MIPS32: lw $25, %call16(fmodf)($gp) +; MIPS64-N32: lw $25, %call16(fmodf)($gp) +; MIPS64-N64: ld $25, %call16(fmodf)($gp) +; ALL: jalr $25 + + %4 = call i16 @llvm.convert.to.fp16.f32(float %rem) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %4, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +@i1 = external global i16, align 1 + +define void @fcmp() { +entry: +; ALL-LABEL: fcmp: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] + + %2 = load i16, i16* @g, align 2 + %3 = call float @llvm.convert.from.fp16.f32(i16 %2) + %fcmp = fcmp oeq float %1, %3 + +; MIPSR5: addiu $[[R2:[0-9]+]], $zero, 1 +; MIPSR5: c.un.s $f[[F0]], $f[[F0]] +; MIPSR5: movt $[[R2]], $zero, $fcc0 +; MIPSR6: cmp.un.s $f[[F1:[0-9]+]], $f[[F0]], $f[[F0]] +; MIPSR6: mfc1 $[[R3:[0-9]]], $f[[F1]] +; MIPSR6: not $[[R4:[0-9]+]], $[[R3]] +; MIPSR6: andi $[[R2:[0-9]+]], $[[R4]], 1 + + %4 = zext i1 %fcmp to i16 + store i16 %4, i16* @i1, align 2 +; ALL: sh $[[R2]] + + ret void +} + +declare float @llvm.powi.f32(float, i32) + +define void @fpowi() { +entry: +; ALL-LABEL: fpowi: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] + + %powi = call float @llvm.powi.f32(float %1, i32 2) + +; ALL: mul.s $f[[F1:[0-9]+]], $f[[F0]], $f[[F0]] + + %2 = call i16 @llvm.convert.to.fp16.f32(float %powi) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + ret void +} + +define void @fpowi_var(i32 %var) { +entry: +; ALL-LABEL: fpowi_var: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] + + %powi = call float @llvm.powi.f32(float %1, i32 %var) + +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(__powisf2)($gp) +; MIPS64-N32-DAG: lw $25, %call16(__powisf2)($gp) +; MIPS64-N64-DAG: ld $25, %call16(__powisf2)($gp) +; ALL-DAG: jalr $25 + + %2 = call i16 @llvm.convert.to.fp16.f32(float %powi) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + ret void +} + +declare float @llvm.pow.f32(float %Val, float %power) + +define void @fpow(float %var) { +entry: +; ALL-LABEL: fpow: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] + + %powi = call float @llvm.pow.f32(float %1, float %var) + +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(powf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(powf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(powf)($gp) +; ALL-DAG: jalr $25 + + %2 = call i16 @llvm.convert.to.fp16.f32(float %powi) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + ret void +} + +declare float @llvm.log2.f32(float %Val) + +define void @flog2() { +entry: +; ALL-LABEL: flog2: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(log2f)($gp) +; MIPS64-N32-DAG: lw $25, %call16(log2f)($gp) +; MIPS64-N64-DAG: ld $25, %call16(log2f)($gp) +; ALL-DAG: jalr $25 + + %log2 = call float @llvm.log2.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %log2) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.log10.f32(float %Val) + +define void @flog10() { +entry: +; ALL-LABEL: flog10: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(log10f)($gp) +; MIPS64-N32-DAG: lw $25, %call16(log10f)($gp) +; MIPS64-N64-DAG: ld $25, %call16(log10f)($gp) +; ALL-DAG: jalr $25 + + %log10 = call float @llvm.log10.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %log10) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.sqrt.f32(float %Val) + +define void @fsqrt() { +entry: +; ALL-LABEL: fsqrt: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; ALL: sqrt.s $f[[F1:[0-9]+]], $f[[F0]] + + %sqrt = call float @llvm.sqrt.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %sqrt) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.sin.f32(float %Val) + +define void @fsin() { +entry: +; ALL-LABEL: fsin: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(sinf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(sinf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(sinf)($gp) +; ALL-DAG: jalr $25 + + %sin = call float @llvm.sin.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %sin) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.cos.f32(float %Val) + +define void @fcos() { +entry: +; ALL-LABEL: fcos: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(cosf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(cosf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(cosf)($gp) +; ALL-DAG: jalr $25 + + %cos = call float @llvm.cos.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %cos) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.exp.f32(float %Val) + +define void @fexp() { +entry: +; ALL-LABEL: fexp: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(expf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(expf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(expf)($gp) +; ALL-DAG: jalr $25 + + %exp = call float @llvm.exp.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %exp) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.exp2.f32(float %Val) + +define void @fexp2() { +entry: +; ALL-LABEL: fexp2: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(exp2f)($gp) +; MIPS64-N32-DAG: lw $25, %call16(exp2f)($gp) +; MIPS64-N64-DAG: ld $25, %call16(exp2f)($gp) +; ALL-DAG: jalr $25 + + %exp2 = call float @llvm.exp2.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %exp2) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.fma.f32(float, float, float) + +define void @ffma(float %b, float %c) { +entry: +; ALL-LABEL: ffma: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(fmaf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(fmaf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(fmaf)($gp) +; ALL-DAG: jalr $25 + + %fma = call float @llvm.fma.f32(float %1, float %b, float %c) + %2 = call i16 @llvm.convert.to.fp16.f32(float %fma) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +; FIXME: For MIPSR6, this should produced the maddf.s instruction. MIPSR5 cannot +; fuse the operation such that the intermediate result is not rounded. + +declare float @llvm.fmuladd.f32(float, float, float) + +define void @ffmuladd(float %b, float %c) { +entry: +; ALL-LABEL: ffmuladd: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-O32: madd.s $f[[F1:[0-9]]], $f14, $f[[F0]], $f12 +; MIPS32-N32: madd.s $f[[F1:[0-9]]], $f13, $f[[F0]], $f12 +; MIPS32-N64: madd.s $f[[F1:[0-9]]], $f13, $f[[F0]], $f12 +; MIPSR6: mul.s $f[[F2:[0-9]+]], $f[[F0]], $f12 +; MIPSR6-O32: add.s $f[[F1:[0-9]+]], $f[[F2]], $f14 +; MIPSR6-N32: add.s $f[[F1:[0-9]+]], $f[[F2]], $f13 +; MIPSR6-N64: add.s $f[[F1:[0-9]+]], $f[[F2]], $f13 + + %fmuladd = call float @llvm.fmuladd.f32(float %1, float %b, float %c) + %2 = call i16 @llvm.convert.to.fp16.f32(float %fmuladd) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.fabs.f32(float %Val) + +define void @ffabs() { +entry: +; ALL-LABEL: ffabs: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; ALL: abs.s $f[[F1:[0-9]+]], $f[[F0]] + + %fabs = call float @llvm.fabs.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %fabs) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 + +; ALL: sh $[[R3]] + ret void +} + +declare float @llvm.minnum.f32(float %Val, float %b) + +define void @fminnum(float %b) { +entry: +; ALL-LABEL: fminnum: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(fminf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(fminf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(fminf)($gp) +; ALL-DAG: jalr $25 + + %minnum = call float @llvm.minnum.f32(float %1, float %b) + %2 = call i16 @llvm.convert.to.fp16.f32(float %minnum) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.maxnum.f32(float %Val, float %b) + +define void @fmaxnum(float %b) { +entry: +; ALL-LABEL: fmaxnum: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(fmaxf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(fmaxf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(fmaxf)($gp) +; ALL-DAG: jalr $25 + + %maxnum = call float @llvm.maxnum.f32(float %1, float %b) + %2 = call i16 @llvm.convert.to.fp16.f32(float %maxnum) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +; This expansion of fcopysign could be done without converting f16 to float. + +declare float @llvm.copysign.f32(float %Val, float %b) + +define void @fcopysign(float %b) { +entry: +; ALL-LABEL: fcopysign: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] + + %copysign = call float @llvm.copysign.f32(float %1, float %b) + %2 = call i16 @llvm.convert.to.fp16.f32(float %copysign) + +; ALL: mfc1 $[[R2:[0-9]+]], $f12 +; ALL: ext $[[R3:[0-9]+]], $3, 31, 1 +; ALL: ins $[[R1]], $[[R3]], 31, 1 +; ALL: fill.w $w[[W2:[0-9]+]], $[[R1]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.floor.f32(float %Val) + +define void @ffloor() { +entry: +; ALL-LABEL: ffloor: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(floorf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(floorf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(floorf)($gp) +; ALL-DAG: jalr $25 + + %floor = call float @llvm.floor.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %floor) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.ceil.f32(float %Val) + +define void @fceil() { +entry: +; ALL-LABEL: fceil: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(ceilf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(ceilf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(ceilf)($gp) +; ALL-DAG: jalr $25 + + %ceil = call float @llvm.ceil.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %ceil) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.trunc.f32(float %Val) + +define void @ftrunc() { +entry: +; ALL-LABEL: ftrunc: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(truncf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(truncf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(truncf)($gp) +; ALL-DAG: jalr $25 + + %trunc = call float @llvm.trunc.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %trunc) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.rint.f32(float %Val) + +define void @frint() { +entry: +; ALL-LABEL: frint: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(rintf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(rintf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(rintf)($gp) +; ALL-DAG: jalr $25 + %rint = call float @llvm.rint.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %rint) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + store i16 %2, i16* @g, align 2 + +; ALL: sh $[[R3]] + ret void +} + +declare float @llvm.nearbyint.f32(float %Val) + +define void @fnearbyint() { +entry: +; ALL-LABEL: fnearbyint: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(nearbyintf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(nearbyintf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(nearbyintf)($gp) +; ALL-DAG: jalr $25 + + %nearbyint = call float @llvm.nearbyint.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %nearbyint) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} + +declare float @llvm.round.f32(float %Val) + +define void @fround() { +entry: +; ALL-LABEL: fround: + %0 = load i16, i16* @g, align 2 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] +; ALL-DAG: mtc1 $[[R1]], $f[[F0:[0-9]+]] +; MIPS32-DAG: lw $25, %call16(roundf)($gp) +; MIPS64-N32-DAG: lw $25, %call16(roundf)($gp) +; MIPS64-N64-DAG: ld $25, %call16(roundf)($gp) +; ALL-DAG: jalr $25 + + %round = call float @llvm.round.f32(float %1) + %2 = call i16 @llvm.convert.to.fp16.f32(float %round) + +; ALL: mfc1 $[[R2:[0-9]+]], $f[[F1]] +; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] +; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] +; ALL: copy_u.h $[[R3:[0-9]+]], $w[[W3]][0] + + store i16 %2, i16* @g, align 2 +; ALL: sh $[[R3]] + + ret void +} Index: test/CodeGen/Mips/msa/fexuprl.ll =================================================================== --- /dev/null +++ test/CodeGen/Mips/msa/fexuprl.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -march=mips -mcpu=mips32r5 -mattr=+fp64,+msa | FileCheck %s + +; Test that fexup[rl].w don't crash LLVM during type legalization. + +@g = local_unnamed_addr global <8 x half> , align 16 +@i = local_unnamed_addr global <4 x float> zeroinitializer, align 16 +@j = local_unnamed_addr global <4 x float> zeroinitializer, align 16 + +define i32 @test() local_unnamed_addr { +entry: + %0 = load <8 x half>, <8 x half>* @g, align 16 + %1 = tail call <4 x float> @llvm.mips.fexupl.w(<8 x half> %0) + store <4 x float> %1, <4 x float>* @i, align 16 +; CHECK: ld.h $w[[W0:[0-9]+]], 0(${{[0-9]+}}) +; CHECK: fexupl.w $w[[W1:[0-9]+]], $w[[W0]] +; CHECK: st.w $w[[W1]], 0(${{[0-9]+}}) + + %2 = tail call <4 x float> @llvm.mips.fexupr.w(<8 x half> %0) + store <4 x float> %2, <4 x float>* @j, align 16 + +; CHECK: fexupr.w $w[[W2:[0-9]+]], $w[[W0]] +; CHECK: st.w $w[[W2]], 0(${{[0-9]+}}) + + ret i32 0 +} + +declare <4 x float> @llvm.mips.fexupl.w(<8 x half>) +declare <4 x float> @llvm.mips.fexupr.w(<8 x half>)