diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -62,6 +62,18 @@ bool expandLoadAddressGot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSLE(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSIE(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSLD(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSGD(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); }; char LoongArchPreRAExpandPseudo::ID = 0; @@ -96,6 +108,14 @@ return expandLoadAddressPcrel(MBB, MBBI, NextMBBI); case LoongArch::PseudoLA_GOT: return expandLoadAddressGot(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_LE: + return expandLoadAddressTLSLE(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_IE: + return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_LD: + return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_GD: + return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI); } return false; } @@ -148,6 +168,71 @@ SecondOpcode, LoongArchII::MO_GOT_PC_LO); } +bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // lu12i.w $rd, %le_hi20(sym) + // ori $rd, $rd, %le_lo12(sym) + MachineFunction *MF = MBB.getParent(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + Register DestReg = MI.getOperand(0).getReg(); + Register ScratchReg = + MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); + MachineOperand &Symbol = MI.getOperand(1); + + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), ScratchReg) + .addDisp(Symbol, 0, LoongArchII::MO_LE_HI); + + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), DestReg) + .addReg(ScratchReg) + .addDisp(Symbol, 0, LoongArchII::MO_LE_LO); + + MI.eraseFromParent(); + return true; +} + +bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %ie_pc_hi20(sym) + // ld.w/d $rd, $rd, %ie_pc_lo12(sym) + MachineFunction *MF = MBB.getParent(); + const auto &STI = MF->getSubtarget(); + unsigned SecondOpcode = STI.is64Bit() ? LoongArch::LD_D : LoongArch::LD_W; + return expandPcalau12iInstPair(MBB, MBBI, NextMBBI, LoongArchII::MO_IE_PC_HI, + SecondOpcode, LoongArchII::MO_IE_PC_LO); +} + +bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %ld_pc_hi20(sym) + // addi.w/d $rd, $rd, %got_pc_lo12(sym) + MachineFunction *MF = MBB.getParent(); + const auto &STI = MF->getSubtarget(); + unsigned SecondOpcode = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + return expandPcalau12iInstPair(MBB, MBBI, NextMBBI, LoongArchII::MO_LD_PC_HI, + SecondOpcode, LoongArchII::MO_GOT_PC_LO); +} + +bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Code Sequence: + // pcalau12i $rd, %gd_pc_hi20(sym) + // addi.w/d $rd, $rd, %got_pc_lo12(sym) + MachineFunction *MF = MBB.getParent(); + const auto &STI = MF->getSubtarget(); + unsigned SecondOpcode = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + return expandPcalau12iInstPair(MBB, MBBI, NextMBBI, LoongArchII::MO_GD_PC_HI, + SecondOpcode, LoongArchII::MO_GOT_PC_LO); +} + } // end namespace INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "LoongArch-prera-expand-pseudo", diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -132,9 +132,14 @@ template SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; + SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, + unsigned Opc) const; + SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, + unsigned Opc) const; SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -62,6 +62,8 @@ ISD::JumpTable}, GRLenVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); @@ -182,6 +184,8 @@ return lowerEH_DWARF_CFA(Op, DAG); case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return lowerGlobalTLSAddress(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::BlockAddress: @@ -342,6 +346,85 @@ return getAddr(N, DAG, N->getGlobal()->isDSOLocal()); } +SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, + SelectionDAG &DAG, + unsigned Opc) const { + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + MVT GRLenVT = Subtarget.getGRLenVT(); + + SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); + SDValue Offset = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); + + // Add the thread pointer. + return DAG.getNode(ISD::ADD, DL, Ty, Offset, + DAG.getRegister(LoongArch::R2, GRLenVT)); +} + +SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, + SelectionDAG &DAG, + unsigned Opc) const { + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); + + // Use a PC-relative addressing mode to access the dynamic GOT address. + SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); + SDValue Load = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); + + // Prepare argument list to generate call. + ArgListTy Args; + ArgListEntry Entry; + Entry.Node = Load; + Entry.Ty = CallTy; + Args.push_back(Entry); + + // Setup call to __tls_get_addr. + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(DAG.getEntryNode()) + .setLibCallee(CallingConv::C, CallTy, + DAG.getExternalSymbol("__tls_get_addr", Ty), + std::move(Args)); + + return LowerCallTo(CLI).first; +} + +SDValue +LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + GlobalAddressSDNode *N = cast(Op); + assert(N->getOffset() == 0 && "unexpected offset in global node"); + + SDValue Addr; + TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); + + switch (Model) { + case TLSModel::GeneralDynamic: + // In this model, application code calls the dynamic linker function + // __tls_get_addr to locate TLS offsets into the dynamic thread vector at + // runtime. + Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_GD); + break; + case TLSModel::LocalDynamic: + // Same as GeneralDynamic, except for assembly modifiers and relocation + // records. + Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LD); + break; + case TLSModel::InitialExec: + // This model uses the GOT to resolve TLS offsets. + Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_IE); + break; + case TLSModel::LocalExec: + // This model is used when static linking as the TLS offsets are resolved + // during program linking. + Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE); + break; + } + + return Addr; +} + SDValue LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = Op.getConstantOperandVal(0); diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -925,6 +925,18 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in def PseudoLA_GOT : Pseudo<(outs GPR:$dst), (ins grlenimm:$src), []>; +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +def PseudoLA_TLS_LE : Pseudo<(outs GPR:$dst), (ins grlenimm:$src), []>; + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins grlenimm:$src), []>; + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins grlenimm:$src), []>; + +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins grlenimm:$src), []>; + /// BSTRINS and BSTRPICK let Predicates = [IsLA32] in { diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp --- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -53,6 +53,24 @@ case LoongArchII::MO_GOT_PC_LO: Kind = LoongArchMCExpr::VK_LoongArch_GOT_LO12; break; + case LoongArchII::MO_LE_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20; + break; + case LoongArchII::MO_LE_LO: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12; + break; + case LoongArchII::MO_IE_PC_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20; + break; + case LoongArchII::MO_IE_PC_LO: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12; + break; + case LoongArchII::MO_LD_PC_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20; + break; + case LoongArchII::MO_GD_PC_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20; + break; // TODO: Handle more target-flags. } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h @@ -33,6 +33,12 @@ MO_PCREL_LO, MO_GOT_PC_HI, MO_GOT_PC_LO, + MO_LE_HI, + MO_LE_LO, + MO_IE_PC_HI, + MO_IE_PC_LO, + MO_LD_PC_HI, + MO_GD_PC_HI, // TODO: Add more flags. }; } // end namespace LoongArchII diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/tls-models.ll @@ -0,0 +1,166 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA32PIC +; RUN: llc --mtriple=loongarch64 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64PIC +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32NOPIC +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64NOPIC + +;; Check that TLS symbols are lowered correctly based on the specified +;; model. Make sure they're external to avoid them all being optimised to Local +;; Exec for the executable. + +@unspecified = external thread_local global i32 +@ld = external thread_local(localdynamic) global i32 +@ie = external thread_local(initialexec) global i32 +@le = external thread_local(localexec) global i32 + +;; No model specified (global dynamic) + +define ptr @f1() nounwind { +; LA32PIC-LABEL: f1: +; LA32PIC: # %bb.0: # %entry +; LA32PIC-NEXT: addi.w $sp, $sp, -16 +; LA32PIC-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32PIC-NEXT: pcalau12i $a0, %gd_pc_hi20(unspecified) +; LA32PIC-NEXT: addi.w $a0, $a0, %got_lo12(unspecified) +; LA32PIC-NEXT: bl %plt(__tls_get_addr) +; LA32PIC-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32PIC-NEXT: addi.w $sp, $sp, 16 +; LA32PIC-NEXT: ret +; +; LA64PIC-LABEL: f1: +; LA64PIC: # %bb.0: # %entry +; LA64PIC-NEXT: addi.d $sp, $sp, -16 +; LA64PIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64PIC-NEXT: pcalau12i $a0, %gd_pc_hi20(unspecified) +; LA64PIC-NEXT: addi.d $a0, $a0, %got_lo12(unspecified) +; LA64PIC-NEXT: bl %plt(__tls_get_addr) +; LA64PIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64PIC-NEXT: addi.d $sp, $sp, 16 +; LA64PIC-NEXT: ret +; +; LA32NOPIC-LABEL: f1: +; LA32NOPIC: # %bb.0: # %entry +; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) +; LA32NOPIC-NEXT: ld.w $a0, $a0, %ie_pc_lo12(unspecified) +; LA32NOPIC-NEXT: add.w $a0, $a0, $tp +; LA32NOPIC-NEXT: ret +; +; LA64NOPIC-LABEL: f1: +; LA64NOPIC: # %bb.0: # %entry +; LA64NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) +; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(unspecified) +; LA64NOPIC-NEXT: add.d $a0, $a0, $tp +; LA64NOPIC-NEXT: ret +entry: + ret ptr @unspecified +} + +;; localdynamic specified + +define ptr @f2() nounwind { +; LA32PIC-LABEL: f2: +; LA32PIC: # %bb.0: # %entry +; LA32PIC-NEXT: addi.w $sp, $sp, -16 +; LA32PIC-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32PIC-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) +; LA32PIC-NEXT: addi.w $a0, $a0, %got_lo12(ld) +; LA32PIC-NEXT: bl %plt(__tls_get_addr) +; LA32PIC-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32PIC-NEXT: addi.w $sp, $sp, 16 +; LA32PIC-NEXT: ret +; +; LA64PIC-LABEL: f2: +; LA64PIC: # %bb.0: # %entry +; LA64PIC-NEXT: addi.d $sp, $sp, -16 +; LA64PIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64PIC-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) +; LA64PIC-NEXT: addi.d $a0, $a0, %got_lo12(ld) +; LA64PIC-NEXT: bl %plt(__tls_get_addr) +; LA64PIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64PIC-NEXT: addi.d $sp, $sp, 16 +; LA64PIC-NEXT: ret +; +; LA32NOPIC-LABEL: f2: +; LA32NOPIC: # %bb.0: # %entry +; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; LA32NOPIC-NEXT: ld.w $a0, $a0, %ie_pc_lo12(ld) +; LA32NOPIC-NEXT: add.w $a0, $a0, $tp +; LA32NOPIC-NEXT: ret +; +; LA64NOPIC-LABEL: f2: +; LA64NOPIC: # %bb.0: # %entry +; LA64NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) +; LA64NOPIC-NEXT: add.d $a0, $a0, $tp +; LA64NOPIC-NEXT: ret +entry: + ret ptr @ld +} + +;; initialexec specified + +define ptr @f3() nounwind { +; LA32PIC-LABEL: f3: +; LA32PIC: # %bb.0: # %entry +; LA32PIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA32PIC-NEXT: ld.w $a0, $a0, %ie_pc_lo12(ie) +; LA32PIC-NEXT: add.w $a0, $a0, $tp +; LA32PIC-NEXT: ret +; +; LA64PIC-LABEL: f3: +; LA64PIC: # %bb.0: # %entry +; LA64PIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA64PIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) +; LA64PIC-NEXT: add.d $a0, $a0, $tp +; LA64PIC-NEXT: ret +; +; LA32NOPIC-LABEL: f3: +; LA32NOPIC: # %bb.0: # %entry +; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA32NOPIC-NEXT: ld.w $a0, $a0, %ie_pc_lo12(ie) +; LA32NOPIC-NEXT: add.w $a0, $a0, $tp +; LA32NOPIC-NEXT: ret +; +; LA64NOPIC-LABEL: f3: +; LA64NOPIC: # %bb.0: # %entry +; LA64NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) +; LA64NOPIC-NEXT: add.d $a0, $a0, $tp +; LA64NOPIC-NEXT: ret +entry: + ret ptr @ie +} + +;; localexec specified + +define ptr @f4() nounwind { +; LA32PIC-LABEL: f4: +; LA32PIC: # %bb.0: # %entry +; LA32PIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA32PIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA32PIC-NEXT: add.w $a0, $a0, $tp +; LA32PIC-NEXT: ret +; +; LA64PIC-LABEL: f4: +; LA64PIC: # %bb.0: # %entry +; LA64PIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA64PIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA64PIC-NEXT: add.d $a0, $a0, $tp +; LA64PIC-NEXT: ret +; +; LA32NOPIC-LABEL: f4: +; LA32NOPIC: # %bb.0: # %entry +; LA32NOPIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA32NOPIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA32NOPIC-NEXT: add.w $a0, $a0, $tp +; LA32NOPIC-NEXT: ret +; +; LA64NOPIC-LABEL: f4: +; LA64NOPIC: # %bb.0: # %entry +; LA64NOPIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA64NOPIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA64NOPIC-NEXT: add.d $a0, $a0, $tp +; LA64NOPIC-NEXT: ret +entry: + ret ptr @le +}