Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -343,6 +343,7 @@ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal); } if (!Subtarget.isGP64bit()) { @@ -385,10 +386,6 @@ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (Subtarget.hasCnMips()) { setOperationAction(ISD::CTPOP, MVT::i32, Legal); Index: lib/Target/Mips/MipsInstrFPU.td =================================================================== --- lib/Target/Mips/MipsInstrFPU.td +++ lib/Target/Mips/MipsInstrFPU.td @@ -247,6 +247,17 @@ let hasFCCRegOperand = 1; } +class UIntToFp_FT : + PseudoSE<(outs OutRC:$dst), (ins InRC:$src), + [(set OutRC:$dst, (uint_to_fp InRC:$src))]> { + let usesCustomInserter = 1; +} + +class FpToUInt_FT : + PseudoSE<(outs OutRC:$dst), (ins InRC:$src), + [(set OutRC:$dst, (fp_to_uint InRC:$src))]> { + let usesCustomInserter = 1; +} multiclass C_COND_M fmt, InstrItinClass itin> { @@ -420,6 +431,19 @@ def PseudoCVT_D64_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, II_CVT>; } +def UInt32ToFp32Pseudo_32 : UIntToFp_FT, FGR_32; +def UInt32ToFp32Pseudo_64 : UIntToFp_FT, FGR_64; +def UInt32ToFp64Pseudo_32 : UIntToFp_FT, FGR_32; +def UInt32ToFp64Pseudo_64 : UIntToFp_FT, FGR_64; +def UInt64ToFp64Pseudo_64 : UIntToFp_FT, FGR_64; + +def Fp32ToUInt32Pseudo_32 : FpToUInt_FT, FGR_32; +def Fp32ToUInt32Pseudo_64 : FpToUInt_FT, FGR_64; +def Fp32ToUInt64Pseudo_64 : FpToUInt_FT, FGR_64; +def Fp64ToUInt32Pseudo_32 : FpToUInt_FT, FGR_32; +def Fp64ToUInt32Pseudo_64 : FpToUInt_FT, FGR_64; +def Fp64ToUInt64Pseudo_64 : FpToUInt_FT, FGR_64; + def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>, ABSS_FM<0x5, 16>; def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>, Index: lib/Target/Mips/MipsMSAInstrInfo.td =================================================================== --- lib/Target/Mips/MipsMSAInstrInfo.td +++ lib/Target/Mips/MipsMSAInstrInfo.td @@ -3772,6 +3772,18 @@ let usesCustomInserter = 1; } + def MSA_UINT_TO_FP : MipsPseudo<(outs MSA128F16:$wd), (ins GPR32Opnd:$rs), + [(set MSA128F16:$wd, + (f16 (uint_to_fp GPR32Opnd:$rs)))]> { + let usesCustomInserter = 1; + } + + def MSA_FP_TO_UINT : MipsPseudo<(outs GPR32Opnd:$rd), (ins MSA128F16:$ws), + [(set GPR32Opnd:$rd, + (i32 (fp_to_uint MSA128F16:$ws)))]> { + let usesCustomInserter = 1; + } + def : MipsPat<(MipsTruncIntFP MSA128F16:$ws), (TRUNC_W_D64 (MSA_FP_EXTEND_D_PSEUDO MSA128F16:$ws))>; Index: lib/Target/Mips/MipsSEISelLowering.h =================================================================== --- lib/Target/Mips/MipsSEISelLowering.h +++ lib/Target/Mips/MipsSEISelLowering.h @@ -125,6 +125,20 @@ MachineBasicBlock *emitFPROUND_PSEUDO(MachineInstr &MI, MachineBasicBlock *BBi, bool IsFGR64) const; + + MachineBasicBlock *emitMSA_UINT_TO_FP(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitMSA_FP_TO_UINT(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitUINT_TO_FP(MachineInstr &MI, MachineBasicBlock *BB, + unsigned CvtOp, unsigned FaddOp, + bool isFP64) const; + + MachineBasicBlock *emitFP_TO_UINT(MachineInstr &MI, MachineBasicBlock *BB, + unsigned TruncOp, unsigned FSubOp, + bool isFP64) const; }; } Index: lib/Target/Mips/MipsSEISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsSEISelLowering.cpp +++ lib/Target/Mips/MipsSEISelLowering.cpp @@ -1049,6 +1049,32 @@ return emitFPEXTEND_PSEUDO(MI, BB, true); case Mips::MSA_FP_ROUND_D_PSEUDO: return emitFPROUND_PSEUDO(MI, BB, true); + case Mips::MSA_UINT_TO_FP: + return emitMSA_UINT_TO_FP(MI, BB); + case Mips::MSA_FP_TO_UINT: + return emitMSA_FP_TO_UINT(MI, BB); + case Mips::UInt32ToFp32Pseudo_32: + return emitUINT_TO_FP(MI, BB, Mips::CVT_D32_W, Mips::FADD_D32, false); + case Mips::UInt32ToFp32Pseudo_64: + return emitUINT_TO_FP(MI, BB, Mips::CVT_S_L, 0, true); + case Mips::UInt32ToFp64Pseudo_32: + return emitUINT_TO_FP(MI, BB, Mips::CVT_D32_W, Mips::FADD_D32, false); + case Mips::UInt32ToFp64Pseudo_64: + return emitUINT_TO_FP(MI, BB, Mips::CVT_D64_L, Mips::FADD_D64, true); + case Mips::UInt64ToFp64Pseudo_64: + return emitUINT_TO_FP(MI, BB, Mips::CVT_D64_L, Mips::FADD_D64, true); + case Mips::Fp32ToUInt32Pseudo_32: + return emitFP_TO_UINT(MI, BB, Mips::TRUNC_W_S, Mips::FSUB_S, false); + case Mips::Fp32ToUInt32Pseudo_64: + return emitFP_TO_UINT(MI, BB, Mips::TRUNC_W_S, Mips::FSUB_S, true); + case Mips::Fp32ToUInt64Pseudo_64: + return emitFP_TO_UINT(MI, BB, Mips::TRUNC_L_S, Mips::FSUB_S, true); + case Mips::Fp64ToUInt32Pseudo_32: + return emitFP_TO_UINT(MI, BB, Mips::TRUNC_W_D32, Mips::FSUB_D32, false); + case Mips::Fp64ToUInt32Pseudo_64: + return emitFP_TO_UINT(MI, BB, Mips::TRUNC_W_D64, Mips::FSUB_D64, true); + case Mips::Fp64ToUInt64Pseudo_64: + return emitFP_TO_UINT(MI, BB, Mips::TRUNC_L_D64, Mips::FSUB_D64, true); } } @@ -3687,6 +3713,517 @@ return BB; } +// Emit the MSA_UINT_TO_FP pseudo instruction. +// +// fill.w $wtemp, $rs +// ffint_u.w $wtemp2, $wtemp +// fexdo.h $wtemp3, $wtemp2, $wtemp2 +// +MachineBasicBlock * +MipsSETargetLowering::emitMSA_UINT_TO_FP(MachineInstr &MI, + MachineBasicBlock *BB) const { + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetRegisterClass *RC = &Mips::MSA128WRegClass; + DebugLoc DL = MI.getDebugLoc(); + unsigned Wd1 = RegInfo.createVirtualRegister(RC); + unsigned Wd2 = RegInfo.createVirtualRegister(RC); + BuildMI(*BB, MI, DL, TII->get(Mips::FILL_W), Wd1) + .addReg(MI.getOperand(1).getReg()); + BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Wd2).addReg(Wd1); + BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), MI.getOperand(0).getReg()) + .addReg(Wd2) + .addReg(Wd2); + + MI.eraseFromParent(); + return BB; +} + +// Emit the MSA_FP_TO_UINT pseudo instruction. +// +// fexupr.w $wtemp, $rs +// ftint_u.w $wtemp2, $wtemp +// copy_u.w $rd, $wtemp2[0] +// +MachineBasicBlock * +MipsSETargetLowering::emitMSA_FP_TO_UINT(MachineInstr &MI, + MachineBasicBlock *BB) const { + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetRegisterClass *RC = &Mips::MSA128WRegClass; + DebugLoc DL = MI.getDebugLoc(); + unsigned Wd1 = RegInfo.createVirtualRegister(RC); + unsigned Wd2 = RegInfo.createVirtualRegister(RC); + BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wd1) + .addReg(MI.getOperand(1).getReg()); + BuildMI(*BB, MI, DL, TII->get(Mips::FTINT_U_W), Wd2).addReg(Wd1); + BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_W), MI.getOperand(0).getReg()) + .addReg(Wd2) + .addImm(0); + + MI.eraseFromParent(); + return BB; +} + +// Emit the UIntToFpPseudo pseudo instruction. +// +// UINT_TO_FP GPR32Opnd:$rs, FGR32Opnd:$fd +// For Mips32r2: +// => +// mtc1 $rs, $ft +// cvt.d.w $ft1, $ft +// bgez $rs, $BB0_2 +// nop +// lui $rt, 16880 +// mtc1 $zero, $ft2 +// mthc1 $1, $ft2 +// add.d $ft1, $ft1, $ft2 +// $BB0_2: +// cvt.s.d $fd, $ft1 +// +// For Mips32r2 with 64-bit FPU: +// => +// mtc1 $rs, $ft +// mthc1 $zero, $ft +// cvt.s.l $fd, $ft +// +// For Mips64r2: +// => +// dext $rt, $rs, 0, 32 +// dmtc1 $rt, $ft +// cvt.s.l $fd, $ft +// +// UINT_TO_FP GPR32Opnd:$rs, FGR64Opnd:$fd +// For Mips32r2: +// => +// mtc1 $rs, $ft +// cvt.d.w $ft, $ft +// bgez $rs, $BB0_2 +// nop +// lui $rt, 16880 +// mtc1 $zero, $ft1 +// mthc1 $rt, $ft1 +// add.d $fd, $ft, $ft1 +// $BB0_2: +// +// For Mips32r2 with 64-bit FPU: +// => +// mtc1 $rs, $ft +// mthc1 $zero, $ft +// cvt.d.l $fd, $ft +// +// For Mips64r2: +// => +// dext $rt, $rs, 0, 32 +// dmtc1 $rt, $ft +// cvt.d.l $fd, $ft +// +// UINT_TO_FP GPR64Opnd:$rs, FGR64Opnd:$fd +// For mips32: Lowered to libcall +// For Mips32 with 64-bit FPU: Lowered by custom hook +// For Mips64: +// => +// dmtc1 $rs, $ft +// cvt.d.l $ft1, $ft +// bgez $rs, .LBB0_2 +// nop +// lui $rt, 17392 +// dsll $rt, $rt, 32 +// dmtc1 $rt, $ft2 +// add.d $fd, $ft1, $ft2 +// .LBB0_2 +// +MachineBasicBlock *MipsSETargetLowering::emitUINT_TO_FP(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned CvtOp, + unsigned FAddOp, + bool IsFP64) const { + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + + const bool IsFGR64onMips64 = Subtarget.hasMips3() && IsFP64; + const bool IsFGR64onMips32 = !Subtarget.hasMips3() && IsFP64; + + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); + + const TargetRegisterClass *FPDestClass = RegInfo.getRegClass(Dest); + const TargetRegisterClass *GPSrcClass = RegInfo.getRegClass(Src); + + const bool IsSrc64 = GPSrcClass == &Mips::GPR64RegClass; + const bool IsDest64 = FPDestClass != &Mips::FGR32RegClass; + + const TargetRegisterClass *FPTempRegClass = + !IsFP64 ? &Mips::AFGR64RegClass : &Mips::FGR64RegClass; + if (IsFGR64onMips64 && !IsSrc64) { + unsigned GPTemp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); + unsigned FPSrc = RegInfo.createVirtualRegister(&Mips::FGR64RegClass); + unsigned GPImpDef = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); + unsigned GPRRes = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), GPImpDef); + BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), GPRRes) + .addReg(GPImpDef) + .addReg(Src, RegState::Kill) + .addImm(Mips::sub_32); + if (Subtarget.hasMips3()) { + BuildMI(*BB, MI, DL, TII->get(Mips::DEXT), GPTemp) + .addReg(GPRRes) + .addImm(0) + .addImm(32); + } else { + unsigned GPTemp1 = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::DSLL), GPTemp1) + .addReg(GPRRes) + .addImm(32); + BuildMI(*BB, MI, DL, TII->get(Mips::DSRL), GPTemp) + .addReg(GPTemp1) + .addImm(32); + } + BuildMI(*BB, MI, DL, TII->get(Mips::DMTC1), FPSrc).addReg(GPTemp); + BuildMI(*BB, MI, DL, TII->get(CvtOp), Dest).addReg(FPSrc); + MI.eraseFromParent(); + return BB; + } else if (IsFGR64onMips32 && !IsSrc64) { + unsigned FPTemp = RegInfo.createVirtualRegister(&Mips::FGR64RegClass); + BuildMI(*BB, MI, DL, TII->get(Mips::BuildPairF64_64), FPTemp) + .addReg(Src, RegState::Kill) + .addReg(Mips::ZERO); + BuildMI(*BB, MI, DL, TII->get(CvtOp), Dest).addReg(FPTemp); + MI.eraseFromParent(); + return BB; + } + + // Transfer the remainder of BB and its successor edges to exitMBB. + MachineFunction *MF = BB->getParent(); + MachineBasicBlock *newMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + + MachineFunction::iterator It = ++BB->getIterator(); + MF->insert(It, newMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(newMBB); + BB->addSuccessor(exitMBB); + newMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + + unsigned FPSrc = RegInfo.createVirtualRegister(!IsSrc64 ? &Mips::FGR32RegClass + : FPTempRegClass); + unsigned FPCvtResult = RegInfo.createVirtualRegister(FPTempRegClass); + unsigned FPAddValue = RegInfo.createVirtualRegister(FPTempRegClass); + unsigned FPAddResult = RegInfo.createVirtualRegister(FPTempRegClass); + unsigned FPDest = RegInfo.createVirtualRegister(FPTempRegClass); + unsigned GPAddValue = RegInfo.createVirtualRegister(GPSrcClass); + + const uint64_t AddValue = IsSrc64 ? 0x43F0 : 0x41F0; + + if (Subtarget.hasMips3() && IsSrc64) + BuildMI(BB, DL, TII->get(Mips::DMTC1), FPSrc).addReg(Src); + else + BuildMI(BB, DL, TII->get(Mips::MTC1), FPSrc).addReg(Src); + + BuildMI(BB, DL, TII->get(CvtOp), FPCvtResult).addReg(FPSrc); + BuildMI(BB, DL, TII->get(IsSrc64 ? Mips::BGEZ64 : Mips::BGEZ)) + .addReg(Src, RegState::Kill) + .addMBB(exitMBB); + + BuildMI(newMBB, DL, TII->get(IsSrc64 ? Mips::LUi64 : Mips::LUi), GPAddValue) + .addImm(AddValue); + + if (!IsFP64) + BuildMI(newMBB, DL, TII->get(Mips::BuildPairF64), FPAddValue) + .addReg(Mips::ZERO) + .addReg(GPAddValue); + else if (IsFGR64onMips32) + BuildMI(newMBB, DL, TII->get(Mips::BuildPairF64_64), FPAddValue) + .addReg(Mips::ZERO) + .addReg(GPAddValue); + else if (IsFGR64onMips64) { + unsigned GPTemp = RegInfo.createVirtualRegister(GPSrcClass); + BuildMI(newMBB, DL, TII->get(Mips::DSLL), GPTemp) + .addReg(GPAddValue) + .addImm(32); + BuildMI(newMBB, DL, TII->get(Mips::DMTC1), FPAddValue).addReg(GPTemp); + } + + MachineBasicBlock::iterator exitMBBI = exitMBB->begin(); + BuildMI(newMBB, DL, TII->get(FAddOp), FPAddResult) + .addReg(FPCvtResult) + .addReg(FPAddValue); + BuildMI(*exitMBB, exitMBBI, DL, TII->get(Mips::PHI), FPDest) + .addReg(FPCvtResult) + .addMBB(BB) + .addReg(FPAddResult) + .addMBB(newMBB); + if (!IsSrc64 && !IsDest64 && !IsFP64) { + BuildMI(*exitMBB, exitMBBI, DL, TII->get(Mips::CVT_S_D32), Dest) + .addReg(FPDest); + } else { + BuildMI(*exitMBB, exitMBBI, DL, TII->get(Mips::COPY), Dest) + .addReg(FPDest); + } + + MI.eraseFromParent(); + return exitMBB; +} + +// Emit the FpToUIntPseudo pseudo instruction. +// +// FP_TO_UINT FGR32Opnd:$fs, GPR32Opnd:$rd +// => +// lui $rt1, 0x4F00 +// mtc1 $ft1, $rt1 +// c.le.s $fcc0, $ft1, $fs +// bc1t $fcc0,$L2 +// nop +// trunc.w.s $f0,$f0 +// mfc1 $2,$f0 +// b $L3 +// nop +// $L2: +// sub.s $f0,$f0,$f1 +// li $3,-2147483648 +// trunc.w.s $f0,$f0 +// mfc1 $2,$f0 +// or $2,$2,$3 +// $L3: + +// +// FP_TO_UINT FGR64Opnd:$fs, GPR32Opnd:$rd +// For Mips32: Lowered to libcall +// For Mips32r2 with FP64i: +// TODO: Provide a combine that replaces fp_to_uint with the correct psuedo +// that will generate the correct sequemce. +// +// For Mips64r2: +// => +// c.le.d $fcc0,$f1,$f0 +// bc1t $fcc0,.L2 +// nop +// trunc.l.d $f0,$f0 +// dmfc1 $3,$f0 +// b .L3 +// nop +// .L2: +// sub.d $f0,$f0,$f1 +// lui $2,0x8000 +// trunc.l.d $f0,$f0 +// dmfc1 $3,$f0 +// or $3,$3,$2 +// .L3: +// +// FP_TO_UINT FGR64Opnd:$rs, GPR64Opnd:$fd +// For Mips32: Lowered to libcall +// For Mips32 with 64-bit FPU: +// => +// c.le.d $fcc0,$f1,$f0 +// bc1t $fcc0,$L2 +// nop +// trunc.l.d $f0,$f0 +// mfc1 $2,$f0 +// mfhc1 $3,$f0 +// b $L3 +// nop +// $L2: +// lw $2,%got($LC0)($28) +// ldc1 $f1,%lo($LC0)($2) +// sub.d $f0,$f0,$f1 +// trunc.l.d $f0,$f0 +// mfc1 $2,$f0 +// mfhc1 $3,$f0 +// xori $4,$2,0 +// li $6,-2147483648 +// xor $5,$3,$6 +// move $2,$4 +// move $3,$5 +// $L3: +// +// For Mips64r2: +// => +// c.le.d $fcc0,$f1,$f0 +// bc1t $fcc0,.L2 +// nop +// trunc.l.d $f0,$f0 +// dmfc1 $3,$f0 +// b .L3 +// nop +// .L2: +// sub.d $f0,$f0,$f1 +// lui $2,0x8000 +// dsll $2,$2,31 +// trunc.l.d $f0,$f0 +// dmfc1 $3,$f0 +// or $3,$3,$2 +// .L3: +// +MachineBasicBlock *MipsSETargetLowering::emitFP_TO_UINT(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned TruncOp, + unsigned FSubOp, + bool IsFP64) const { + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + + const bool IsFGR64onMips64 = Subtarget.hasMips3() && IsFP64; + const bool IsFGR64onMips32 = !Subtarget.hasMips3() && IsFP64; + + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); + + const TargetRegisterClass *GPDestClass = RegInfo.getRegClass(Dest); + const TargetRegisterClass *FPSrcClass = RegInfo.getRegClass(Src); + + const bool IsSrc64 = FPSrcClass != &Mips::FGR32RegClass; + const bool IsDest64 = GPDestClass == &Mips::GPR64RegClass; + + // Transfer the remainder of BB and its successor edges to exitMBB. + MachineFunction *MF = BB->getParent(); + MachineBasicBlock *truncateMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *correctionMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + + MachineFunction::iterator It = ++BB->getIterator(); + MF->insert(It, truncateMBB); + MF->insert(It, correctionMBB); + MF->insert(It, exitMBB); + + exitMBB->splice(exitMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(truncateMBB); + BB->addSuccessor(correctionMBB); + correctionMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + truncateMBB->addSuccessor(exitMBB, BranchProbability::getOne()); + unsigned GPSubValue = RegInfo.createVirtualRegister( + (IsSrc64 && IsFGR64onMips64) ? &Mips::GPR64RegClass + : &Mips::GPR32RegClass); + unsigned FPSubValue = RegInfo.createVirtualRegister( + IsSrc64 ? (IsFP64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass) + : &Mips::FGR32RegClass); + unsigned TruncatedValue1 = RegInfo.createVirtualRegister( + IsDest64 ? &Mips::FGR64RegClass : &Mips::FGR32RegClass); + unsigned TruncatedValue2 = RegInfo.createVirtualRegister( + IsDest64 ? &Mips::FGR64RegClass : &Mips::FGR32RegClass); + unsigned TempResult1 = RegInfo.createVirtualRegister( + IsDest64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass); + unsigned TempResult2 = RegInfo.createVirtualRegister( + IsDest64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass); + unsigned SubtractedValue = RegInfo.createVirtualRegister( + IsSrc64 ? (IsFP64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass) + : &Mips::FGR32RegClass); + unsigned TruncatedValueGP = RegInfo.createVirtualRegister( + IsDest64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass); + unsigned MSBBit = RegInfo.createVirtualRegister( + IsDest64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass); + + const uint64_t SubValue = IsSrc64 ? 0x41E0 : (IsDest64 ? 0x5F00 : 0x4F00); + + BuildMI(BB, DL, + TII->get((IsSrc64 && IsFGR64onMips64) ? Mips::LUi64 : Mips::LUi), + GPSubValue) + .addImm(SubValue); + + if (IsSrc64) { + if (!IsFP64) + BuildMI(BB, DL, TII->get(Mips::BuildPairF64), FPSubValue) + .addReg(Mips::ZERO) + .addReg(GPSubValue); + else if (IsFGR64onMips32) + BuildMI(BB, DL, TII->get(Mips::BuildPairF64_64), FPSubValue) + .addReg(Mips::ZERO) + .addReg(GPSubValue); + else { + unsigned GPTemp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); + BuildMI(BB, DL, TII->get(Mips::DSLL32), GPTemp) + .addReg(GPSubValue) + .addImm(0); + BuildMI(BB, DL, TII->get(Mips::DMTC1), FPSubValue).addReg(GPTemp); + } + } else + BuildMI(BB, DL, TII->get(Mips::MTC1), FPSubValue).addReg(GPSubValue); + BuildMI(BB, DL, + TII->get(IsSrc64 ? (IsFP64 ? Mips::C_LE_D64 : Mips::C_LE_D32) + : Mips::C_LE_S), + Mips::FCC0) + .addReg(FPSubValue) + .addReg(Src); + BuildMI(BB, DL, TII->get(Mips::BC1T)) + .addReg(Mips::FCC0) + .addMBB(correctionMBB); + + BuildMI(truncateMBB, DL, TII->get(TruncOp), TruncatedValue1) + .addReg(Src, RegState::Kill); + + if (IsDest64) { + if (IsFGR64onMips32) + BuildMI(truncateMBB, DL, TII->get(Mips::ExtractElementF64_64), + TempResult1) + .addReg(TruncatedValue1); + else if (IsFGR64onMips64) + BuildMI(truncateMBB, DL, TII->get(Mips::DMFC1), TempResult1) + .addReg(TruncatedValue1); + } else { + BuildMI(truncateMBB, DL, TII->get(Mips::MFC1), TempResult1) + .addReg(TruncatedValue1); + } + + BuildMI(truncateMBB, DL, TII->get(Mips::B)).addMBB(exitMBB); + + BuildMI(correctionMBB, DL, TII->get(FSubOp), SubtractedValue) + .addReg(Src, RegState::Kill) + .addReg(FPSubValue); + BuildMI(correctionMBB, DL, TII->get(TruncOp), TruncatedValue2) + .addReg(SubtractedValue); + + if (IsDest64) { + unsigned GPTemp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); + if (IsFGR64onMips32) + BuildMI(correctionMBB, DL, TII->get(Mips::ExtractElementF64_64), + TruncatedValueGP) + .addReg(TruncatedValue2); + else + BuildMI(correctionMBB, DL, TII->get(Mips::DMFC1), TruncatedValueGP) + .addReg(TruncatedValue2); + // FIXME: The delay slot filler fails to schedule LUi(64) into the delay + // slot of the BC1T. + BuildMI(correctionMBB, DL, TII->get(Mips::LUi64), MSBBit).addImm(0x8000); + BuildMI(correctionMBB, DL, TII->get(Mips::DSLL), GPTemp) + .addReg(MSBBit) + .addImm(31); + BuildMI(correctionMBB, DL, TII->get(Mips::OR64), TempResult2) + .addReg(TruncatedValueGP) + .addReg(GPTemp); + } else { + BuildMI(correctionMBB, DL, TII->get(Mips::MFC1), TruncatedValueGP) + .addReg(TruncatedValue2); + BuildMI(correctionMBB, DL, TII->get(Mips::LUi), MSBBit).addImm(0x8000); + BuildMI(correctionMBB, DL, TII->get(Mips::OR), TempResult2) + .addReg(TruncatedValueGP) + .addReg(MSBBit); + } + + BuildMI(*exitMBB, exitMBB->begin(), DL, TII->get(Mips::PHI), Dest) + .addReg(TempResult1) + .addMBB(truncateMBB) + .addReg(TempResult2) + .addMBB(correctionMBB); + + MI.eraseFromParent(); + return exitMBB; +} + // Emit the FEXP2_W_1 pseudo instructions. // // fexp2_w_1_pseudo $wd, $wt Index: test/CodeGen/Mips/2008-07-07-Float2Int.ll =================================================================== --- test/CodeGen/Mips/2008-07-07-Float2Int.ll +++ test/CodeGen/Mips/2008-07-07-Float2Int.ll @@ -2,6 +2,7 @@ define i32 @fptoint(float %a) nounwind { entry: +; CHECK-LABEL: fptoint ; CHECK: trunc.w.s fptosi float %a to i32 ; :0 [#uses=1] ret i32 %0 @@ -9,8 +10,7 @@ define i32 @fptouint(float %a) nounwind { entry: -; CHECK: fptouint -; CHECK: trunc.w.s +; CHECK-LABEL: fptouint ; CHECK: trunc.w.s fptoui float %a to i32 ; :0 [#uses=1] ret i32 %0 Index: test/CodeGen/Mips/cconv/vector.ll =================================================================== --- test/CodeGen/Mips/cconv/vector.ll +++ test/CodeGen/Mips/cconv/vector.ll @@ -1443,7 +1443,7 @@ ; MIPS32-DAG: mtc1 $5, $f{{[0-9]+}} ; MIPS32: andi $[[R7:[0-9]+]], $6, 255 ; MIPS32: mtc1 $[[R7]], $f[[F0:[0-9]+]] -; MIPS32: cvt.s.w $f{{[0-9]+}}, $f[[F0]] +; MIPS32: cvt.d.w $f{{[0-9]+}}, $f[[F0]] ; MIPS32-DAG: mtc1 $4, $f{{[0-9]+}} ; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 16($sp) @@ -1461,7 +1461,7 @@ ; MIPS64EB: sll $[[R6:[0-9]+]], $5, 0 ; MIPS64EB: andi $[[R7:[0-9]+]], $[[R6]], 255 ; MIPS64EB: mtc1 $[[R7]], $f[[F0:[0-9]+]] -; MIPS64EB: cvt.s.w $f{{[0-9]+}}, $f[[F0]] +; MIPS64EB: cvt.s.l $f{{[0-9]+}}, $f[[F0]] ; MIPS64EB-DAG: dsrl $[[R1:[0-9]+]], $4, 32 ; MIPS64EB-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0 @@ -1480,7 +1480,7 @@ ; MIPS64EL: sll $[[R6:[0-9]+]], $5, 0 ; MIPS64EL: andi $[[R7:[0-9]+]], $[[R6]], 255 ; MIPS64EL: mtc1 $[[R7]], $f[[F0:[0-9]+]] -; MIPS64EL: cvt.s.w $f{{[0-9]+}}, $f[[F0]] +; MIPS64EL: cvt.s.l $f{{[0-9]+}}, $f[[F0]] ; MIPS64EL-DAG: dsrl $[[R4:[0-9]+]], $6, 32 ; MIPS64EL-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0 @@ -1514,8 +1514,8 @@ ; MIPS32-DAG: mtc1 $6, $f{{[0-9]+}} ; MIPS32-DAG: mtc1 $7, $f{{[0-9]+}} -; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 28($sp) -; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 24($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 20($sp) +; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 16($sp) ; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4) ; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4) ; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4) @@ -1564,12 +1564,6 @@ entry: ; ALL-LABEL: cast: -; MIPS32: addiu $sp, $sp, -32 -; MIPS32-DAG: sw $6, {{[0-9]+}}($sp) -; MIPS32-DAG: sw $7, {{[0-9]+}}($sp) -; MIPS32-DAG: lw ${{[0-9]+}}, 48($sp) -; MIPS32-DAG: lw ${{[0-9]+}}, 52($sp) - ; MIPS32R5-DAG: insert.w $w0[0], $6 ; MIPS32R5-DAG: insert.w $w0[1], $7 ; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp) Index: test/CodeGen/Mips/mips64-f128.ll =================================================================== --- test/CodeGen/Mips/mips64-f128.ll +++ test/CodeGen/Mips/mips64-f128.ll @@ -170,7 +170,7 @@ } ; ALL-LABEL: conv_UChar_LD: -; ALL: ld $25, %call16(__fixtfsi) +; ALL: ld $25, %call16(__fixunstfsi) define zeroext i8 @conv_UChar_LD(fp128 %a) { entry: @@ -179,7 +179,7 @@ } ; ALL-LABEL: conv_UShort_LD: -; ALL: ld $25, %call16(__fixtfsi) +; ALL: ld $25, %call16(__fixunstfsi) define zeroext i16 @conv_UShort_LD(fp128 %a) { entry: Index: test/CodeGen/Mips/msa/f16-llvm-ir.ll =================================================================== --- test/CodeGen/Mips/msa/f16-llvm-ir.ll +++ test/CodeGen/Mips/msa/f16-llvm-ir.ll @@ -118,70 +118,11 @@ %0 = load half, half * @h, align 2 %1 = fptoui half %0 to i32 -; MIPS32: lwc1 $f[[FC:[0-9]+]], %lo($CPI{{[0-9]+}}_{{[0-9]+}}) -; MIPS64-N32: lwc1 $f[[FC:[0-9]+]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) -; MIPS64-N64: lwc1 $f[[FC:[0-9]+]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) - -; ALL: lh $[[R0:[0-9]+]] -; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] -; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] -; ALL: copy_s.w $[[R1:[0-9]+]], $w[[W1]][0] -; ALL: mtc1 $[[R1]], $f[[F0:[0-9]+]] -; MIPSR6: cmp.lt.s $f[[F1:[0-9]+]], $f[[F0]], $f[[FC]] -; ALL: sub.s $f[[F2:[0-9]+]], $f[[F0]], $f[[FC]] -; ALL: mfc1 $[[R2:[0-9]]], $f[[F2]] -; ALL: fill.w $w[[W2:[0-9]+]], $[[R2]] -; ALL: fexdo.h $w[[W3:[0-9]+]], $w[[W2]], $w[[W2]] -; ALL: fexupr.w $w[[W4:[0-9]+]], $w[[W3]] -; ALL: fexupr.d $w[[W5:[0-9]+]], $w[[W4]] - -; MIPS32: copy_s.w $[[R3:[0-9]+]], $w[[W5]][0] -; MIPS32: mtc1 $[[R3]], $f[[F3:[0-9]+]] -; MIPS32: copy_s.w $[[R4:[0-9]+]], $w[[W5]][1] -; MIPS32: mthc1 $[[R3]], $f[[F3]] - -; MIPS64: copy_s.d $[[R2:[0-9]+]], $w[[W2]][0] -; MIPS64: dmtc1 $[[R2]], $f[[F3:[0-9]+]] - -; ALL: trunc.w.d $f[[F4:[0-9]+]], $f[[F3]] -; ALL: mfc1 $[[R4:[0-9]+]], $f[[F4]] -; ALL: fexupr.d $w[[W6:[0-9]+]], $w[[W1]] - -; MIPS32: copy_s.w $[[R5:[0-9]+]], $w[[W6]][0] -; MIPS32: mtc1 $[[R5]], $f[[F5:[0-9]+]] -; MIPS32: copy_s.w $[[R6:[0-9]+]], $w[[W6]][1] -; MIPS32: mthc1 $[[R6]], $f[[F5]] - -; MIPS64: copy_s.d $[[R2:[0-9]+]], $w[[W2]][0] -; MIPS64: dmtc1 $[[R2]], $f[[F5:[0-9]+]] - -; ALL: trunc.w.d $f[[F6:[0-9]]], $f[[F5]] -; ALL: mfc1 $[[R7:[0-9]]], $f[[F6]] - -; MIPS32R5-O32: lw $[[R13:[0-9]+]], %got($CPI{{[0-9]+}}_{{[0-9]+}}) -; MIPS32R5-O32: addiu $[[R14:[0-9]+]], $[[R13]], %lo($CPI{{[0-9]+}}_{{[0-9]+}}) - -; MIPS64R5-N32: lw $[[R13:[0-9]+]], %got_page(.LCPI{{[0-9]+}}_{{[0-9]+}}) -; MIPS64R5-N32: addiu $[[R14:[0-9]+]], $[[R13]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) - -; MIPS64R5-N64: ld $[[R13:[0-9]+]], %got_page(.LCPI{{[0-9]+}}_{{[0-9]+}}) -; MIPS64R5-N64: daddiu $[[R14:[0-9]+]], $[[R13]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) - -; ALL: lui $[[R8:[0-9]+]], 32768 -; ALL: xor $[[R9:[0-9]+]], $[[R4]], $[[R8]] - -; MIPSR5: lh $[[R15:[0-9]+]], 0($[[R14]]) -; MIPSR5: fill.h $w[[W7:[0-9]+]], $[[R15]] -; MIPSR5: fexupr.w $w[[W8:[0-9]+]], $w[[W7]] -; MIPSR5: copy_s.w $[[R16:[0-9]+]], $w[[W8]][0] -; MIPSR5: mtc1 $[[R16]], $f[[F7:[0-9]+]] -; MIPSR5: c.olt.s $f[[F0]], $f[[F7]] -; MIPSR5: movt $[[R9]], $[[R7]], $fcc0 - -; MIPSR6: mfc1 $[[R10:[0-9]+]], $f[[F1]] -; MIPSR6: seleqz $[[R11:[0-9]]], $[[R9]], $[[R10]] -; MIPSR6: selnez $[[R12:[0-9]]], $[[R7]], $[[R10]] -; MIPSR6: or $2, $[[R12]], $[[R11]] +; ALL: lh $[[R0:[0-9]+]] +; ALL: fill.h $w[[W0:[0-9]+]], $[[R0]] +; ALL: fexupr.w $w[[W1:[0-9]+]], $w[[W0]] +; ALL: ftint_u.w $w[[W2:[0-9]+]], $w[[W1]] +; ALL: copy_u.w $2, $w[[W2]][0] ret i32 %1 } @@ -214,36 +155,17 @@ entry: ; ALL-LABEL: uitofp: -; MIPS32-O32: ldc1 $f[[F0:[0-9]+]], %lo($CPI{{[0-9]+}}_{{[0-9]+}}) -; MIPS32-O32: ldc1 $f[[F1:[0-9]+]], 0($sp) - -; MIPS64-N32: ldc1 $f[[F0:[0-9]+]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) -; MIPS64-N32: ldc1 $f[[F1:[0-9]+]], 8($sp) - -; MIPS64-N64: ldc1 $f[[F0:[0-9]+]], %got_ofst(.LCPI{{[0-9]+}}_{{[0-9]+}}) -; MIPS64-N64: ldc1 $f[[F1:[0-9]+]], 8($sp) +; MIPS32: lw $[[R3:[0-9]+]], %got(h) +; MIPS32: fill.w $w[[W0:[0-9]+]], $4 +; MIPS32: ffint_u.w $w[[W1:[0-9]+]], $w[[W0]] -; MIPSR5: sub.d $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]] -; MIPSR6-O32: sub.d $f[[F2:[0-9]+]], $f[[F0]], $f[[F1]] -; MIPSR6-N32: sub.d $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]] -; MIPSR6-N64: sub.d $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]] - -; MIPS32: mfc1 $[[R0:[0-9]+]], $f[[F2]] -; MIPS32: fill.w $w[[W0:[0-9]+]], $[[R0]] -; MIPS32: mfhc1 $[[R1:[0-9]+]], $f[[F2]] -; MIPS32: insert.w $w[[W0]][1], $[[R1]] -; MIPS32: insert.w $w[[W0]][3], $[[R1]] - -; MIPS64-N64-DAG: ld $[[R3:[0-9]+]], %got_disp(h) +; MIPS64-DAG: sll $[[R1:[0-9]+]], $4, 0 ; MIPS64-N32-DAG: lw $[[R3:[0-9]+]], %got_disp(h) -; MIPS64-DAG: dmfc1 $[[R1:[0-9]+]], $f[[F2]] -; MIPS64-DAG: fill.d $w[[W0:[0-9]+]], $[[R1]] +; MIPS64-N64-DAG: ld $[[R3:[0-9]+]], %got_disp(h) +; MIPS64-DAG: fill.w $w[[W0:[0-9]+]], $[[R1]] -; ALL-DAG: fexdo.w $w[[W1:[0-9]+]], $w[[W0]], $w[[W0]] ; ALL-DAG: fexdo.h $w[[W2:[0-9]+]], $w[[W1]], $w[[W1]] -; MIPS32-DAG: lw $[[R3:[0-9]+]], %got(h) - ; ALL: copy_u.h $[[R2:[0-9]+]], $w[[W2]] ; ALL: sh $[[R2]], 0($[[R3]]) %0 = uitofp i32 %a to half