diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -19,8 +19,11 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -57,24 +60,39 @@ MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi, unsigned SecondOpcode, unsigned FlagsLo); + bool expandLargeAddressLoad(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LastOpcode, unsigned IdentifyingMO); + bool expandLargeAddressLoad(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LastOpcode, unsigned IdentifyingMO, + const MachineOperand &Symbol, Register DestReg, + bool EraseFromParent); bool expandLoadAddressPcrel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); + MachineBasicBlock::iterator &NextMBBI, + bool Large = false); bool expandLoadAddressGot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); + MachineBasicBlock::iterator &NextMBBI, + bool Large = false); bool expandLoadAddressTLSLE(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool expandLoadAddressTLSIE(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); + MachineBasicBlock::iterator &NextMBBI, + bool Large = false); bool expandLoadAddressTLSLD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); + MachineBasicBlock::iterator &NextMBBI, + bool Large = false); bool expandLoadAddressTLSGD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI); + MachineBasicBlock::iterator &NextMBBI, + bool Large = false); bool expandFunctionCALL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI, @@ -111,16 +129,26 @@ switch (MBBI->getOpcode()) { case LoongArch::PseudoLA_PCREL: return expandLoadAddressPcrel(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_PCREL_LARGE: + return expandLoadAddressPcrel(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_GOT: return expandLoadAddressGot(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_GOT_LARGE: + return expandLoadAddressGot(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_LE: return expandLoadAddressTLSLE(MBB, MBBI, NextMBBI); case LoongArch::PseudoLA_TLS_IE: return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_IE_LARGE: + return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_LD: return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_LD_LARGE: + return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoLA_TLS_GD: return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_GD_LARGE: + return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI, /*Large=*/true); case LoongArch::PseudoCALL: return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); case LoongArch::PseudoTAIL: @@ -157,9 +185,114 @@ return true; } +bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, + unsigned IdentifyingMO) { + MachineInstr &MI = *MBBI; + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, + MI.getOperand(2), MI.getOperand(0).getReg(), + true); +} + +bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, + unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, + bool EraseFromParent) { + // Code Sequence: + // + // Part0: addi.d $dest, $zero, %FlagsLo(sym) + // Part2: lu32i.d $dest, %Flags2(sym) + // Part3: lu52i.d $dest, $dest, %Flags3(sym) + // Part1: pcalau12i $scratch, %Flags1(sym) + // Fin: LastOpcode $dest, $dest, $scratch + + unsigned Flags0, Flags1, Flags2, Flags3; + switch (IdentifyingMO) { + default: + llvm_unreachable("unsupported MO"); + case LoongArchII::MO_PCREL_LO: + Flags0 = IdentifyingMO; + Flags1 = LoongArchII::MO_PCREL_HI; + Flags2 = LoongArchII::MO_PCREL64_LO; + Flags3 = LoongArchII::MO_PCREL64_HI; + break; + case LoongArchII::MO_GOT_PC_HI: + case LoongArchII::MO_LD_PC_HI: + case LoongArchII::MO_GD_PC_HI: + // These cases relocate just like the GOT case, except for Part1. + Flags1 = IdentifyingMO; + Flags0 = LoongArchII::MO_GOT_PC_LO; + Flags2 = LoongArchII::MO_GOT_PC64_LO; + Flags3 = LoongArchII::MO_GOT_PC64_HI; + break; + case LoongArchII::MO_IE_PC_LO: + Flags0 = IdentifyingMO; + Flags1 = LoongArchII::MO_IE_PC_HI; + Flags2 = LoongArchII::MO_IE_PC64_LO; + Flags3 = LoongArchII::MO_IE_PC64_HI; + break; + } + + MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + Register TmpPart1 = + MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); + Register TmpPart0 = + DestReg.isVirtual() + ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : DestReg; + Register TmpPart02 = + DestReg.isVirtual() + ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : DestReg; + Register TmpPart023 = + DestReg.isVirtual() + ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : DestReg; + + auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), TmpPart0) + .addReg(LoongArch::R0); + auto Part2 = + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), TmpPart02) + .addReg(TmpPart0, + RegState::Kill); // "rj" is needed due to InstrInfo pattern + auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), TmpPart023) + .addReg(TmpPart02, RegState::Kill); + auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), TmpPart1); + BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) + .addReg(TmpPart023) + .addReg(TmpPart1, RegState::Kill); + + if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { + const char *SymName = Symbol.getSymbolName(); + Part0.addExternalSymbol(SymName, Flags0); + Part1.addExternalSymbol(SymName, Flags1); + Part2.addExternalSymbol(SymName, Flags2); + Part3.addExternalSymbol(SymName, Flags3); + } else { + Part0.addDisp(Symbol, 0, Flags0); + Part1.addDisp(Symbol, 0, Flags1); + Part2.addDisp(Symbol, 0, Flags2); + Part3.addDisp(Symbol, 0, Flags3); + } + + if (EraseFromParent) + MI.eraseFromParent(); + + return true; +} + bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator &NextMBBI, bool Large) { + if (Large) + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_PCREL_LO); + // Code Sequence: // pcalau12i $rd, %pc_hi20(sym) // addi.w/d $rd, $rd, %pc_lo12(sym) @@ -172,7 +305,11 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressGot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator &NextMBBI, bool Large) { + if (Large) + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_GOT_PC_HI); + // Code Sequence: // pcalau12i $rd, %got_pc_hi20(sym) // ld.w/d $rd, $rd, %got_pc_lo12(sym) @@ -189,29 +326,55 @@ // Code Sequence: // lu12i.w $rd, %le_hi20(sym) // ori $rd, $rd, %le_lo12(sym) + // + // And additionally if generating code using the large code model: + // + // lu32i.d $rd, %le64_lo20(sym) + // lu52i.d $rd, $rd, %le64_hi12(sym) MachineFunction *MF = MBB.getParent(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); + bool Large = MF->getTarget().getCodeModel() == CodeModel::Large; Register DestReg = MI.getOperand(0).getReg(); - Register ScratchReg = + Register Part01 = + Large ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : DestReg; + Register Part1 = MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); MachineOperand &Symbol = MI.getOperand(1); - BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), ScratchReg) + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), Part1) .addDisp(Symbol, 0, LoongArchII::MO_LE_HI); - BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), DestReg) - .addReg(ScratchReg) + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), Part01) + .addReg(Part1, RegState::Kill) .addDisp(Symbol, 0, LoongArchII::MO_LE_LO); + if (Large) { + Register Part012 = + MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), Part012) + .addReg(Part01, + RegState::Kill) // "rj" is needed due to InstrInfo pattern + .addDisp(Symbol, 0, LoongArchII::MO_LE64_LO); + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), DestReg) + .addReg(Part012, RegState::Kill) + .addDisp(Symbol, 0, LoongArchII::MO_LE64_HI); + } + MI.eraseFromParent(); return true; } bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator &NextMBBI, bool Large) { + if (Large) + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_IE_PC_LO); + // Code Sequence: // pcalau12i $rd, %ie_pc_hi20(sym) // ld.w/d $rd, $rd, %ie_pc_lo12(sym) @@ -224,7 +387,11 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator &NextMBBI, bool Large) { + if (Large) + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_LD_PC_HI); + // Code Sequence: // pcalau12i $rd, %ld_pc_hi20(sym) // addi.w/d $rd, $rd, %got_pc_lo12(sym) @@ -237,7 +404,11 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI) { + MachineBasicBlock::iterator &NextMBBI, bool Large) { + if (Large) + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_GD_PC_HI); + // Code Sequence: // pcalau12i $rd, %gd_pc_hi20(sym) // addi.w/d $rd, $rd, %got_pc_lo12(sym) @@ -299,6 +470,20 @@ CALL.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_LO); break; } + case CodeModel::Large: { + // Load address the "large" way, then JIRL_TAIL or JIRL_CALL to $addr. + Opcode = + IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; + Register AddrReg = + IsTailCall + ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : LoongArch::R1; + + expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_PCREL_LO, Func, AddrReg, false); + CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); + break; + } } // Transfer implicit operands. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -210,9 +210,9 @@ template SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const; + unsigned Opc, bool Large = false) const; SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const; + unsigned Opc, bool Large = false) const; SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -22,9 +22,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsLoongArch.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" @@ -467,16 +470,40 @@ SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); - // TODO: Check CodeModel. - if (IsLocal) - // This generates the pattern (PseudoLA_PCREL sym), which expands to - // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). - return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), - 0); - // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d - // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). - return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0); + switch (DAG.getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model"); + + case CodeModel::Large: { + // This is not actually used, but is necessary for successfully matching + // PseudoLA_PCREL_LARGE. + SDValue Tmp = DAG.getConstant(0, DL, Ty); + if (IsLocal) + // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym). + return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty, + Tmp, Addr), + 0); + + // This generates the pattern (PseudoLA_GOT_LARGE tmp sym). + return SDValue( + DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr), + 0); + } + + case CodeModel::Small: + case CodeModel::Medium: + if (IsLocal) + // This generates the pattern (PseudoLA_PCREL sym), which expands to + // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). + return SDValue( + DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0); + + // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d + // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). + return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), + 0); + } } SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, @@ -503,13 +530,19 @@ SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const { + unsigned Opc, + bool Large) const { SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); MVT GRLenVT = Subtarget.getGRLenVT(); + // This is not actually used, but is necessary for successfully matching the + // PseudoLA_*_LARGE nodes. + SDValue Tmp = DAG.getConstant(0, DL, Ty); SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); - SDValue Offset = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); + SDValue Offset = Large + ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) + : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); // Add the thread pointer. return DAG.getNode(ISD::ADD, DL, Ty, Offset, @@ -518,14 +551,20 @@ SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const { + unsigned Opc, + bool Large) const { SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); + // This is not actually used, but is necessary for successfully matching the + // PseudoLA_*_LARGE nodes. + SDValue Tmp = DAG.getConstant(0, DL, Ty); + // Use a PC-relative addressing mode to access the dynamic GOT address. SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); - SDValue Load = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); + SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) + : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); // Prepare argument list to generate call. ArgListTy Args; @@ -552,6 +591,8 @@ CallingConv::GHC) report_fatal_error("In GHC calling convention TLS is not supported"); + bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; + GlobalAddressSDNode *N = cast(Op); assert(N->getOffset() == 0 && "unexpected offset in global node"); @@ -561,20 +602,31 @@ // In this model, application code calls the dynamic linker function // __tls_get_addr to locate TLS offsets into the dynamic thread vector at // runtime. - Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_GD); + Addr = getDynamicTLSAddr(N, DAG, + Large ? LoongArch::PseudoLA_TLS_GD_LARGE + : LoongArch::PseudoLA_TLS_GD, + Large); break; case TLSModel::LocalDynamic: // Same as GeneralDynamic, except for assembly modifiers and relocation // records. - Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LD); + Addr = getDynamicTLSAddr(N, DAG, + Large ? LoongArch::PseudoLA_TLS_LD_LARGE + : LoongArch::PseudoLA_TLS_LD, + Large); break; case TLSModel::InitialExec: // This model uses the GOT to resolve TLS offsets. - Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_IE); + Addr = getStaticTLSAddr(N, DAG, + Large ? LoongArch::PseudoLA_TLS_IE_LARGE + : LoongArch::PseudoLA_TLS_IE, + Large); break; case TLSModel::LocalExec: // This model is used when static linking as the TLS offsets are resolved // during program linking. + // + // This node doesn't need an extra argument for the large code model. Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE); break; } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -478,12 +478,20 @@ {MO_CALL_PLT, "loongarch-call-plt"}, {MO_PCREL_HI, "loongarch-pcrel-hi"}, {MO_PCREL_LO, "loongarch-pcrel-lo"}, + {MO_PCREL64_LO, "loongarch-pcrel64-lo"}, + {MO_PCREL64_HI, "loongarch-pcrel64-hi"}, {MO_GOT_PC_HI, "loongarch-got-pc-hi"}, {MO_GOT_PC_LO, "loongarch-got-pc-lo"}, + {MO_GOT_PC64_LO, "loongarch-got-pc64-lo"}, + {MO_GOT_PC64_HI, "loongarch-got-pc64-hi"}, {MO_LE_HI, "loongarch-le-hi"}, {MO_LE_LO, "loongarch-le-lo"}, + {MO_LE64_LO, "loongarch-le64-lo"}, + {MO_LE64_HI, "loongarch-le64-hi"}, {MO_IE_PC_HI, "loongarch-ie-pc-hi"}, {MO_IE_PC_LO, "loongarch-ie-pc-lo"}, + {MO_IE_PC64_LO, "loongarch-ie-pc64-lo"}, + {MO_IE_PC64_HI, "loongarch-ie-pc64-hi"}, {MO_LD_PC_HI, "loongarch-ld-pc-hi"}, {MO_GD_PC_HI, "loongarch-gd-pc-hi"}}; return ArrayRef(TargetFlags); diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp --- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -47,24 +47,48 @@ case LoongArchII::MO_PCREL_LO: Kind = LoongArchMCExpr::VK_LoongArch_PCALA_LO12; break; + case LoongArchII::MO_PCREL64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_PCALA64_LO20; + break; + case LoongArchII::MO_PCREL64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_PCALA64_HI12; + break; case LoongArchII::MO_GOT_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_GOT_PC_HI20; break; case LoongArchII::MO_GOT_PC_LO: Kind = LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12; break; + case LoongArchII::MO_GOT_PC64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_GOT64_PC_LO20; + break; + case LoongArchII::MO_GOT_PC64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_GOT64_PC_HI12; + break; case LoongArchII::MO_LE_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20; break; case LoongArchII::MO_LE_LO: Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12; break; + case LoongArchII::MO_LE64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE64_LO20; + break; + case LoongArchII::MO_LE64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE64_HI12; + break; case LoongArchII::MO_IE_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20; break; case LoongArchII::MO_IE_PC_LO: Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12; break; + case LoongArchII::MO_IE_PC64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_LO20; + break; + case LoongArchII::MO_IE_PC64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_HI12; + break; case LoongArchII::MO_LD_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20; break; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h @@ -31,12 +31,20 @@ MO_CALL_PLT, MO_PCREL_HI, MO_PCREL_LO, + MO_PCREL64_LO, + MO_PCREL64_HI, MO_GOT_PC_HI, MO_GOT_PC_LO, + MO_GOT_PC64_LO, + MO_GOT_PC64_HI, MO_LE_HI, MO_LE_LO, + MO_LE64_LO, + MO_LE64_HI, MO_IE_PC_HI, MO_IE_PC_LO, + MO_IE_PC64_LO, + MO_IE_PC64_HI, MO_LD_PC_HI, MO_GD_PC_HI, // TODO: Add more flags. diff --git a/llvm/test/CodeGen/LoongArch/codemodel-medium.ll b/llvm/test/CodeGen/LoongArch/code-models.ll rename from llvm/test/CodeGen/LoongArch/codemodel-medium.ll rename to llvm/test/CodeGen/LoongArch/code-models.ll --- a/llvm/test/CodeGen/LoongArch/codemodel-medium.ll +++ b/llvm/test/CodeGen/LoongArch/code-models.ll @@ -3,6 +3,8 @@ ; RUN: FileCheck --check-prefix=SMALL %s ; RUN: llc --mtriple=loongarch64 --code-model=medium < %s | \ ; RUN: FileCheck --check-prefix=MEDIUM %s +; RUN: llc --mtriple=loongarch64 --code-model=large < %s | \ +; RUN: FileCheck --check-prefix=LARGE %s declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) declare i32 @callee(i32) @@ -26,6 +28,20 @@ ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; MEDIUM-NEXT: addi.d $sp, $sp, 16 ; MEDIUM-NEXT: ret +; +; LARGE-LABEL: call_globaladdress: +; LARGE: # %bb.0: +; LARGE-NEXT: addi.d $sp, $sp, -16 +; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE-NEXT: addi.d $ra, $zero, %pc_lo12(callee) +; LARGE-NEXT: lu32i.d $ra, %pc64_lo20(callee) +; LARGE-NEXT: lu52i.d $ra, $ra, %pc64_hi12(callee) +; LARGE-NEXT: pcalau12i $a1, %pc_hi20(callee) +; LARGE-NEXT: add.d $ra, $ra, $a1 +; LARGE-NEXT: jirl $ra, $ra, 0 +; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE-NEXT: addi.d $sp, $sp, 16 +; LARGE-NEXT: ret %1 = call i32 @callee(i32 %a) ret i32 %1 } @@ -57,6 +73,24 @@ ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; MEDIUM-NEXT: addi.d $sp, $sp, 16 ; MEDIUM-NEXT: ret +; +; LARGE-LABEL: call_external_sym: +; LARGE: # %bb.0: # %entry +; LARGE-NEXT: addi.d $sp, $sp, -16 +; LARGE-NEXT: .cfi_def_cfa_offset 16 +; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE-NEXT: .cfi_offset 1, -8 +; LARGE-NEXT: ori $a2, $zero, 1000 +; LARGE-NEXT: move $a1, $zero +; LARGE-NEXT: addi.d $ra, $zero, %pc_lo12(memset) +; LARGE-NEXT: lu32i.d $ra, %pc64_lo20(memset) +; LARGE-NEXT: lu52i.d $ra, $ra, %pc64_hi12(memset) +; LARGE-NEXT: pcalau12i $a3, %pc_hi20(memset) +; LARGE-NEXT: add.d $ra, $ra, $a3 +; LARGE-NEXT: jirl $ra, $ra, 0 +; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE-NEXT: addi.d $sp, $sp, 16 +; LARGE-NEXT: ret entry: call void @llvm.memset.p0.i64(ptr %dst, i8 0, i64 1000, i1 false) ret void @@ -73,6 +107,15 @@ ; MEDIUM: # %bb.0: # %entry ; MEDIUM-NEXT: pcalau12i $a1, %pc_hi20(callee_tail) ; MEDIUM-NEXT: jirl $zero, $a1, %pc_lo12(callee_tail) +; +; LARGE-LABEL: caller_tail: +; LARGE: # %bb.0: # %entry +; LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(callee_tail) +; LARGE-NEXT: lu32i.d $a1, %pc64_lo20(callee_tail) +; LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(callee_tail) +; LARGE-NEXT: pcalau12i $a2, %pc_hi20(callee_tail) +; LARGE-NEXT: add.d $a1, $a1, $a2 +; LARGE-NEXT: jr $a1 entry: %r = tail call i32 @callee_tail(i32 %i) ret i32 %r diff --git a/llvm/test/CodeGen/LoongArch/global-address.ll b/llvm/test/CodeGen/LoongArch/global-address.ll --- a/llvm/test/CodeGen/LoongArch/global-address.ll +++ b/llvm/test/CodeGen/LoongArch/global-address.ll @@ -3,6 +3,8 @@ ; RUN: llc --mtriple=loongarch32 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA32PIC ; RUN: llc --mtriple=loongarch64 --relocation-model=static < %s | FileCheck %s --check-prefix=LA64NOPIC ; RUN: llc --mtriple=loongarch64 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64PIC +; RUN: llc --mtriple=loongarch64 --code-model=large --relocation-model=static < %s | FileCheck %s --check-prefix=LA64LARGENOPIC +; RUN: llc --mtriple=loongarch64 --code-model=large --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64LARGEPIC @g = dso_local global i32 zeroinitializer, align 4 @G = global i32 zeroinitializer, align 4 @@ -47,6 +49,38 @@ ; LA64PIC-NEXT: addi.d $a0, $a0, %pc_lo12(.Lg$local) ; LA64PIC-NEXT: ld.w $a0, $a0, 0 ; LA64PIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: foo: +; LA64LARGENOPIC: # %bb.0: +; LA64LARGENOPIC-NEXT: addi.d $a0, $zero, %got_pc_lo12(G) +; LA64LARGENOPIC-NEXT: lu32i.d $a0, %got64_pc_lo20(G) +; LA64LARGENOPIC-NEXT: lu52i.d $a0, $a0, %got64_pc_hi12(G) +; LA64LARGENOPIC-NEXT: pcalau12i $a1, %got_pc_hi20(G) +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $a1 +; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGENOPIC-NEXT: addi.d $a0, $zero, %pc_lo12(g) +; LA64LARGENOPIC-NEXT: lu32i.d $a0, %pc64_lo20(g) +; LA64LARGENOPIC-NEXT: lu52i.d $a0, $a0, %pc64_hi12(g) +; LA64LARGENOPIC-NEXT: pcalau12i $a1, %pc_hi20(g) +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $a1 +; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGENOPIC-NEXT: ret +; +; LA64LARGEPIC-LABEL: foo: +; LA64LARGEPIC: # %bb.0: +; LA64LARGEPIC-NEXT: addi.d $a0, $zero, %got_pc_lo12(G) +; LA64LARGEPIC-NEXT: lu32i.d $a0, %got64_pc_lo20(G) +; LA64LARGEPIC-NEXT: lu52i.d $a0, $a0, %got64_pc_hi12(G) +; LA64LARGEPIC-NEXT: pcalau12i $a1, %got_pc_hi20(G) +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $a1 +; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGEPIC-NEXT: addi.d $a0, $zero, %pc_lo12(.Lg$local) +; LA64LARGEPIC-NEXT: lu32i.d $a0, %pc64_lo20(.Lg$local) +; LA64LARGEPIC-NEXT: lu52i.d $a0, $a0, %pc64_hi12(.Lg$local) +; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(.Lg$local) +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $a1 +; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGEPIC-NEXT: ret %V = load volatile i32, ptr @G %v = load volatile i32, ptr @g ret void diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll --- a/llvm/test/CodeGen/LoongArch/tls-models.ll +++ b/llvm/test/CodeGen/LoongArch/tls-models.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA32PIC ; RUN: llc --mtriple=loongarch64 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64PIC +; RUN: llc --mtriple=loongarch64 --code-model=large --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64LARGEPIC ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32NOPIC ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64NOPIC +; RUN: llc --mtriple=loongarch64 --code-model=large < %s | FileCheck %s --check-prefix=LA64LARGENOPIC ;; Check that TLS symbols are lowered correctly based on the specified ;; model. Make sure they're external to avoid them all being optimised to Local @@ -38,6 +40,25 @@ ; LA64PIC-NEXT: addi.d $sp, $sp, 16 ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f1: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 +; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64LARGEPIC-NEXT: addi.d $a0, $zero, %got_pc_lo12(unspecified) +; LA64LARGEPIC-NEXT: lu32i.d $a0, %got64_pc_lo20(unspecified) +; LA64LARGEPIC-NEXT: lu52i.d $a0, $a0, %got64_pc_hi12(unspecified) +; LA64LARGEPIC-NEXT: pcalau12i $a1, %gd_pc_hi20(unspecified) +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $a1 +; LA64LARGEPIC-NEXT: addi.d $ra, $zero, %pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $ra, %pc64_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: add.d $ra, $ra, $a1 +; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 +; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f1: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) @@ -51,6 +72,16 @@ ; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(unspecified) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f1: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: addi.d $a0, $zero, %ie_pc_lo12(unspecified) +; LA64LARGENOPIC-NEXT: lu32i.d $a0, %ie64_pc_lo20(unspecified) +; LA64LARGENOPIC-NEXT: lu52i.d $a0, $a0, %ie64_pc_hi12(unspecified) +; LA64LARGENOPIC-NEXT: pcalau12i $a1, %ie_pc_hi20(unspecified) +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $a1 +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @unspecified } @@ -80,6 +111,25 @@ ; LA64PIC-NEXT: addi.d $sp, $sp, 16 ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f2: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 +; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64LARGEPIC-NEXT: addi.d $a0, $zero, %got_pc_lo12(ld) +; LA64LARGEPIC-NEXT: lu32i.d $a0, %got64_pc_lo20(ld) +; LA64LARGEPIC-NEXT: lu52i.d $a0, $a0, %got64_pc_hi12(ld) +; LA64LARGEPIC-NEXT: pcalau12i $a1, %ld_pc_hi20(ld) +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $a1 +; LA64LARGEPIC-NEXT: addi.d $ra, $zero, %pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $ra, %pc64_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $ra, $ra, %pc64_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: pcalau12i $a1, %pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: add.d $ra, $ra, $a1 +; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 +; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f2: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) @@ -93,6 +143,16 @@ ; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f2: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: addi.d $a0, $zero, %ie_pc_lo12(ld) +; LA64LARGENOPIC-NEXT: lu32i.d $a0, %ie64_pc_lo20(ld) +; LA64LARGENOPIC-NEXT: lu52i.d $a0, $a0, %ie64_pc_hi12(ld) +; LA64LARGENOPIC-NEXT: pcalau12i $a1, %ie_pc_hi20(ld) +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $a1 +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @ld } @@ -114,6 +174,16 @@ ; LA64PIC-NEXT: add.d $a0, $a0, $tp ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f3: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: addi.d $a0, $zero, %ie_pc_lo12(ie) +; LA64LARGEPIC-NEXT: lu32i.d $a0, %ie64_pc_lo20(ie) +; LA64LARGEPIC-NEXT: lu52i.d $a0, $a0, %ie64_pc_hi12(ie) +; LA64LARGEPIC-NEXT: pcalau12i $a1, %ie_pc_hi20(ie) +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $a1 +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f3: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) @@ -127,6 +197,16 @@ ; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f3: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: addi.d $a0, $zero, %ie_pc_lo12(ie) +; LA64LARGENOPIC-NEXT: lu32i.d $a0, %ie64_pc_lo20(ie) +; LA64LARGENOPIC-NEXT: lu52i.d $a0, $a0, %ie64_pc_hi12(ie) +; LA64LARGENOPIC-NEXT: pcalau12i $a1, %ie_pc_hi20(ie) +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $a1 +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @ie } @@ -148,6 +228,15 @@ ; LA64PIC-NEXT: add.d $a0, $a0, $tp ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f4: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA64LARGEPIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA64LARGEPIC-NEXT: lu32i.d $a0, %le64_lo20(le) +; LA64LARGEPIC-NEXT: lu52i.d $a0, $a0, %le64_hi12(le) +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f4: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: lu12i.w $a0, %le_hi20(le) @@ -161,6 +250,15 @@ ; LA64NOPIC-NEXT: ori $a0, $a0, %le_lo12(le) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f4: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA64LARGENOPIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA64LARGENOPIC-NEXT: lu32i.d $a0, %le64_lo20(le) +; LA64LARGENOPIC-NEXT: lu52i.d $a0, $a0, %le64_hi12(le) +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @le }