diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -121,6 +121,13 @@ def FSTGT_S : FP_STORE_3R<0b00111000011101100, "fstgt.s", FPR32>; def FSTLE_S : FP_STORE_3R<0b00111000011101110, "fstle.s", FPR32>; +// Pseudo instructions for spill/reload CFRs. +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +def PseudoST_CFR : Pseudo<(outs), + (ins CFR:$ccd, GPR:$rj, grlenimm:$imm)>; +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def PseudoLD_CFR : Pseudo<(outs CFR:$ccd), + (ins GPR:$rj, grlenimm:$imm)>; } // Predicates = [HasBasicF] //===----------------------------------------------------------------------===// @@ -159,7 +166,7 @@ class PatFPSetcc : Pat<(any_fsetcc RegTy:$fj, RegTy:$fk, cc), - (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>; + (CmpInst RegTy:$fj, RegTy:$fk)>; // SETOGT/SETOGE/SETUGT/SETUGE/SETGE/SETNE/SETGT will expand into // SETOLT/SETOLE/SETULT/SETULE/SETLE/SETEQ/SETLT. def : PatFPSetcc; @@ -200,7 +207,7 @@ class PatStrictFsetccs : Pat<(strict_fsetccs RegTy:$fj, RegTy:$fk, cc), - (MOVCF2GR (CmpInst RegTy:$fj, RegTy:$fk))>; + (CmpInst RegTy:$fj, RegTy:$fk)>; def : PatStrictFsetccs; def : PatStrictFsetccs; def : PatStrictFsetccs; @@ -215,8 +222,8 @@ /// Select -def : Pat<(select GPR:$cc, FPR32:$fk, FPR32:$fj), - (FSEL_S FPR32:$fj, FPR32:$fk, (MOVGR2CF GPR:$cc))>; +def : Pat<(select CFR:$cc, FPR32:$fk, FPR32:$fj), + (FSEL_S FPR32:$fj, FPR32:$fk, CFR:$cc)>; /// Selectcc diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -212,8 +212,8 @@ /// Select -def : Pat<(select GPR:$cc, FPR64:$fk, FPR64:$fj), - (FSEL_D FPR64:$fj, FPR64:$fk, (MOVGR2CF GPR:$cc))>; +def : Pat<(select CFR:$cc, FPR64:$fk, FPR64:$fj), + (FSEL_D FPR64:$fj, FPR64:$fk, CFR:$cc)>; /// Selectcc diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -13,6 +13,8 @@ #include "LoongArchInstrInfo.h" #include "LoongArch.h" #include "LoongArchMachineFunctionInfo.h" +#include "LoongArchRegisterInfo.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "MCTargetDesc/LoongArchMatInt.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -37,6 +39,21 @@ return; } + // GPR->CFR copy. + if (LoongArch::CFRRegClass.contains(DstReg) && + LoongArch::GPRRegClass.contains(SrcReg)) { + BuildMI(MBB, MBBI, DL, get(LoongArch::MOVGR2CF), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + // CFR->GPR copy. + if (LoongArch::GPRRegClass.contains(DstReg) && + LoongArch::CFRRegClass.contains(SrcReg)) { + BuildMI(MBB, MBBI, DL, get(LoongArch::MOVCF2GR), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + // FPR->FPR copies. unsigned Opc; if (LoongArch::FPR32RegClass.contains(DstReg, SrcReg)) { @@ -71,6 +88,8 @@ Opcode = LoongArch::FST_S; else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) Opcode = LoongArch::FST_D; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoST_CFR; else llvm_unreachable("Can't store this register to stack slot"); @@ -104,6 +123,8 @@ Opcode = LoongArch::FLD_S; else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) Opcode = LoongArch::FLD_D; + else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) + Opcode = LoongArch::PseudoLD_CFR; else llvm_unreachable("Can't load this register from stack slot"); diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -94,6 +94,13 @@ if (TFI->hasBP(MF)) markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp + // FIXME: To avoid generating COPY instructions between CFRs, only use $fcc0. + // This is required to work around the fact that COPY instruction between CFRs + // is not provided in LoongArch. + if (MF.getSubtarget().hasBasicF()) + for (size_t Reg = LoongArch::FCC1; Reg <= LoongArch::FCC7; ++Reg) + markSuperRegs(Reserved, Reg); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } @@ -124,6 +131,8 @@ const LoongArchInstrInfo *TII = STI.getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); DebugLoc DL = MI.getDebugLoc(); + bool IsLA64 = STI.is64Bit(); + unsigned MIOpc = MI.getOpcode(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); Register FrameReg; @@ -134,14 +143,14 @@ bool FrameRegIsKill = false; if (!isInt<12>(Offset.getFixed())) { - unsigned Addi = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; - unsigned Add = STI.is64Bit() ? LoongArch::ADD_D : LoongArch::ADD_W; + unsigned Addi = IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W; + unsigned Add = IsLA64 ? LoongArch::ADD_D : LoongArch::ADD_W; // The offset won't fit in an immediate, so use a scratch register instead. // Modify Offset and FrameReg appropriately. Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass); TII->movImm(MBB, II, DL, ScratchReg, Offset.getFixed()); - if (MI.getOpcode() == Addi) { + if (MIOpc == Addi) { BuildMI(MBB, II, DL, TII->get(Add), MI.getOperand(0).getReg()) .addReg(FrameReg) .addReg(ScratchReg, RegState::Kill); @@ -156,6 +165,33 @@ FrameRegIsKill = true; } + // Spill CFRs. + if (MIOpc == LoongArch::PseudoST_CFR) { + Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(LoongArch::MOVCF2GR), ScratchReg) + .add(MI.getOperand(0)); + BuildMI(MBB, II, DL, TII->get(IsLA64 ? LoongArch::ST_D : LoongArch::ST_W)) + .addReg(ScratchReg, RegState::Kill) + .addReg(FrameReg) + .addImm(Offset.getFixed()); + MI.eraseFromParent(); + return; + } + + // Reload CFRs. + if (MIOpc == LoongArch::PseudoLD_CFR) { + Register ScratchReg = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(MBB, II, DL, TII->get(IsLA64 ? LoongArch::LD_D : LoongArch::LD_W), + ScratchReg) + .addReg(FrameReg) + .addImm(Offset.getFixed()); + BuildMI(MBB, II, DL, TII->get(LoongArch::MOVGR2CF)) + .add(MI.getOperand(0)) + .addReg(ScratchReg, RegState::Kill); + MI.eraseFromParent(); + return; + } + MI.getOperand(FIOperandNum) .ChangeToRegister(FrameReg, false, false, FrameRegIsKill); MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll @@ -324,7 +324,6 @@ ; LA64-NEXT: # %bb.1: # %if.then ; LA64-NEXT: ret ; LA64-NEXT: .LBB17_2: # %if.else -; LA64-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 ; LA64-NEXT: ret %cmp = fcmp fast oeq double %a, 0.000000e+00 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-flt.ll @@ -304,7 +304,6 @@ ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ret ; LA32-NEXT: .LBB17_2: # %if.else -; LA32-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 ; LA32-NEXT: ret ; @@ -318,7 +317,6 @@ ; LA64-NEXT: # %bb.1: # %if.then ; LA64-NEXT: ret ; LA64-NEXT: .LBB17_2: # %if.else -; LA64-NEXT: fcmp.ceq.s $fcc0, $fa0, $fa1 ; LA64-NEXT: movcf2gr $a0, $fcc0 ; LA64-NEXT: ret %cmp = fcmp fast oeq float %a, 0.000000e+00 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll --- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll @@ -492,9 +492,9 @@ ; LA32F-NEXT: ffint.s.w $fa0, $fa0 ; LA32F-NEXT: fadd.s $fa0, $fa0, $fa0 ; LA32F-NEXT: slti $a1, $a0, 0 -; LA32F-NEXT: movgr2cf $fcc0, $a1 ; LA32F-NEXT: movgr2fr.w $fa1, $a0 ; LA32F-NEXT: ffint.s.w $fa1, $fa1 +; LA32F-NEXT: movgr2cf $fcc0, $a1 ; LA32F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 ; LA32F-NEXT: ret ; @@ -570,9 +570,9 @@ ; LA64D-NEXT: ffint.s.l $fa0, $fa0 ; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 ; LA64D-NEXT: slti $a1, $a0, 0 -; LA64D-NEXT: movgr2cf $fcc0, $a1 ; LA64D-NEXT: movgr2fr.d $fa1, $a0 ; LA64D-NEXT: ffint.s.l $fa1, $fa1 +; LA64D-NEXT: movgr2cf $fcc0, $a1 ; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 ; LA64D-NEXT: ret %1 = uitofp i64 %a to float diff --git a/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll b/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/spill-reload-cfr.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +;; Check the $fcc* register is spilled before funtion call and then reloaded. +declare void @foo() + +define i1 @load_store_fcc_reg(float %a, i1 %c) { +; LA32-LABEL: load_store_fcc_reg: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: .cfi_def_cfa_offset 32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: .cfi_offset 56, -16 +; LA32-NEXT: .cfi_offset 57, -24 +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: fmov.s $fs0, $fa0 +; LA32-NEXT: movgr2fr.w $fs1, $zero +; LA32-NEXT: fcmp.cult.s $fcc0, $fs1, $fa0 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: st.w $a0, $sp, 4 +; LA32-NEXT: bl %plt(foo) +; LA32-NEXT: ld.w $a0, $sp, 4 +; LA32-NEXT: movgr2cf $fcc0, $a0 +; LA32-NEXT: bcnez $fcc0, .LBB0_2 +; LA32-NEXT: # %bb.1: # %if.then +; LA32-NEXT: move $a0, $fp +; LA32-NEXT: b .LBB0_3 +; LA32-NEXT: .LBB0_2: # %if.else +; LA32-NEXT: fcmp.cle.s $fcc0, $fs0, $fs1 +; LA32-NEXT: movcf2gr $a0, $fcc0 +; LA32-NEXT: .LBB0_3: # %if.then +; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: load_store_fcc_reg: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -48 +; LA64-NEXT: .cfi_def_cfa_offset 48 +; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs1, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: .cfi_offset 56, -24 +; LA64-NEXT: .cfi_offset 57, -32 +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: fmov.s $fs0, $fa0 +; LA64-NEXT: movgr2fr.w $fs1, $zero +; LA64-NEXT: fcmp.cult.s $fcc0, $fs1, $fa0 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: st.d $a0, $sp, 8 +; LA64-NEXT: bl %plt(foo) +; LA64-NEXT: ld.d $a0, $sp, 8 +; LA64-NEXT: movgr2cf $fcc0, $a0 +; LA64-NEXT: bcnez $fcc0, .LBB0_2 +; LA64-NEXT: # %bb.1: # %if.then +; LA64-NEXT: move $a0, $fp +; LA64-NEXT: b .LBB0_3 +; LA64-NEXT: .LBB0_2: # %if.else +; LA64-NEXT: fcmp.cle.s $fcc0, $fs0, $fs1 +; LA64-NEXT: movcf2gr $a0, $fcc0 +; LA64-NEXT: .LBB0_3: # %if.then +; LA64-NEXT: fld.d $fs1, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 48 +; LA64-NEXT: ret + %cmp = fcmp ole float %a, 0.000000e+00 + call void @foo() + br i1 %cmp, label %if.then, label %if.else + +if.then: + ret i1 %c + +if.else: + ret i1 %cmp +}