diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h --- a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h +++ b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h @@ -42,6 +42,12 @@ fixup_ve_plt_hi32, fixup_ve_plt_lo32, + /// fixups for Thread Local Storage + fixup_ve_tls_gd_hi32, + fixup_ve_tls_gd_lo32, + fixup_ve_tpoff_hi32, + fixup_ve_tpoff_lo32, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h @@ -34,6 +34,10 @@ VK_VE_GOTOFF_LO32, VK_VE_PLT_HI32, VK_VE_PLT_LO32, + VK_VE_TLS_GD_HI32, + VK_VE_TLS_GD_LO32, + VK_VE_TPOFF_HI32, + VK_VE_TPOFF_LO32, }; private: diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp @@ -54,6 +54,10 @@ case VK_VE_GOTOFF_LO32: case VK_VE_PLT_HI32: case VK_VE_PLT_LO32: + case VK_VE_TLS_GD_HI32: + case VK_VE_TLS_GD_LO32: + case VK_VE_TPOFF_HI32: + case VK_VE_TPOFF_LO32: return false; // OS << "@("; break; } return true; @@ -90,9 +94,21 @@ case VK_VE_PLT_HI32: OS << "@plt_hi"; break; + case VK_VE_TLS_GD_HI32: + OS << "@tls_gd_hi"; + break; + case VK_VE_TLS_GD_LO32: + OS << "@tls_gd_lo"; + break; case VK_VE_PLT_LO32: OS << "@plt_lo"; break; + case VK_VE_TPOFF_HI32: + OS << "@tpoff_hi"; + break; + case VK_VE_TPOFF_LO32: + OS << "@tpoff_lo"; + break; } } @@ -108,6 +124,10 @@ .Case("gotoff_lo", VK_VE_GOTOFF_LO32) .Case("plt_hi", VK_VE_PLT_HI32) .Case("plt_lo", VK_VE_PLT_LO32) + .Case("tls_gd_hi", VK_VE_TLS_GD_HI32) + .Case("tls_gd_lo", VK_VE_TLS_GD_LO32) + .Case("tpoff_hi", VK_VE_TPOFF_HI32) + .Case("tpoff_lo", VK_VE_TPOFF_LO32) .Default(VK_VE_None); } @@ -135,6 +155,10 @@ return VE::fixup_ve_plt_hi32; case VK_VE_PLT_LO32: return VE::fixup_ve_plt_lo32; + case VK_VE_TLS_GD_HI32: + return VE::fixup_ve_tls_gd_hi32; + case VK_VE_TLS_GD_LO32: + return VE::fixup_ve_tls_gd_lo32; } } @@ -144,10 +168,38 @@ return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup); } +static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { + switch (Expr->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expr!"); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Expr); + fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); + fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: { + const MCSymbolRefExpr &SymRef = *cast(Expr); + cast(SymRef.getSymbol()).setType(ELF::STT_TLS); + break; + } + + case MCExpr::Unary: + fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); + break; + } +} + void VEMCExpr::visitUsedExpr(MCStreamer &Streamer) const { Streamer.visitUsedExpr(*getSubExpr()); } void VEMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { - llvm_unreachable("TODO implement"); + fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); } diff --git a/llvm/lib/Target/VE/VEAsmPrinter.cpp b/llvm/lib/Target/VE/VEAsmPrinter.cpp --- a/llvm/lib/Target/VE/VEAsmPrinter.cpp +++ b/llvm/lib/Target/VE/VEAsmPrinter.cpp @@ -51,6 +51,8 @@ const MCSubtargetInfo &STI); void lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI, const MCSubtargetInfo &STI); + void lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI); void emitInstruction(const MachineInstr *MI) override; @@ -82,6 +84,15 @@ OutStreamer.emitInstruction(SICInst, STI); } +static void emitBSIC(MCStreamer &OutStreamer, MCOperand &R1, MCOperand &R2, + const MCSubtargetInfo &STI) { + MCInst BSICInst; + BSICInst.setOpcode(VE::BSIC); + BSICInst.addOperand(R1); + BSICInst.addOperand(R2); + OutStreamer.emitInstruction(BSICInst, STI); +} + static void emitLEAzzi(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD, const MCSubtargetInfo &STI) { MCInst LEAInst; @@ -241,6 +252,63 @@ emitLEASLrri(*OutStreamer, MCRegOP, RegPLT, hiImm, MCRegOP, STI); } +void VEAsmPrinter::lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI, + const MCSubtargetInfo &STI) { + const MachineOperand &Addr = MI->getOperand(0); + MCSymbol *AddrSym = nullptr; + + switch (Addr.getType()) { + default: + llvm_unreachable(""); + return; + case MachineOperand::MO_MachineBasicBlock: + report_fatal_error("MBB is not supporeted yet"); + return; + case MachineOperand::MO_ConstantPoolIndex: + report_fatal_error("ConstantPool is not supporeted yet"); + return; + case MachineOperand::MO_ExternalSymbol: + AddrSym = GetExternalSymbolSymbol(Addr.getSymbolName()); + break; + case MachineOperand::MO_GlobalAddress: + AddrSym = getSymbol(Addr.getGlobal()); + break; + } + + MCOperand RegLR = MCOperand::createReg(VE::SX10); // LR + MCOperand RegS0 = MCOperand::createReg(VE::SX0); // S0 + MCOperand RegS12 = MCOperand::createReg(VE::SX12); // S12 + MCSymbol *GetTLSLabel = OutContext.getOrCreateSymbol(Twine("__tls_get_addr")); + + // lea %s0, sym@tls_gd_lo(-24) + // and %s0, %s0, (32)0 + // sic %lr + // lea.sl %s0, sym@tls_gd_hi(%s0, %lr) + // lea %s12, __tls_get_addr@plt_lo(8) + // and %s12, %s12, (32)0 + // lea.sl %s12, __tls_get_addr@plt_hi(%s12, %lr) + // bsic %lr, (, %s12) + MCOperand cim24 = MCOperand::createImm(-24); + MCOperand loImm = + createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_LO32, AddrSym, OutContext); + emitLEAzii(*OutStreamer, cim24, loImm, RegS0, STI); + MCOperand ci32 = MCOperand::createImm(32); + emitANDrm0(*OutStreamer, RegS0, ci32, RegS0, STI); + emitSIC(*OutStreamer, RegLR, STI); + MCOperand hiImm = + createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_HI32, AddrSym, OutContext); + emitLEASLrri(*OutStreamer, RegS0, RegLR, hiImm, RegS0, STI); + MCOperand ci8 = MCOperand::createImm(8); + MCOperand loImm2 = + createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, GetTLSLabel, OutContext); + emitLEAzii(*OutStreamer, ci8, loImm2, RegS12, STI); + emitANDrm0(*OutStreamer, RegS12, ci32, RegS12, STI); + MCOperand hiImm2 = + createGOTRelExprOp(VEMCExpr::VK_VE_PLT_HI32, GetTLSLabel, OutContext); + emitLEASLrri(*OutStreamer, RegS12, RegLR, hiImm2, RegS12, STI); + emitBSIC(*OutStreamer, RegLR, RegS12, STI); +} + void VEAsmPrinter::emitInstruction(const MachineInstr *MI) { switch (MI->getOpcode()) { @@ -255,6 +323,9 @@ case VE::GETFUNPLT: lowerGETFunPLTAndEmitMCInsts(MI, getSubtargetInfo()); return; + case VE::GETTLSADDR: + lowerGETTLSAddrAndEmitMCInsts(MI, getSubtargetInfo()); + return; } MachineBasicBlock::const_instr_iterator I = MI->getIterator(); diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -28,6 +28,8 @@ Lo, // Hi/Lo operations, typically on a global address. GETFUNPLT, // load function address through %plt insturction + GETTLSADDR, // load address for TLS access + CALL, // A call instruction. RET_FLAG, // Return with a flag operand. GLOBAL_BASE_REG, // Global base reg for PIC. @@ -77,6 +79,8 @@ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; /// } Custom Lower SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -572,6 +572,7 @@ MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); setOperationAction(ISD::BlockAddress, PtrVT, Custom); setOperationAction(ISD::GlobalAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); /// VAARG handling { setOperationAction(ISD::VASTART, MVT::Other, Custom); @@ -640,6 +641,7 @@ TARGET_NODE_CASE(Lo) TARGET_NODE_CASE(Hi) TARGET_NODE_CASE(GETFUNPLT) + TARGET_NODE_CASE(GETTLSADDR) TARGET_NODE_CASE(CALL) TARGET_NODE_CASE(RET_FLAG) TARGET_NODE_CASE(GLOBAL_BASE_REG) @@ -748,6 +750,56 @@ return makeAddress(Op, DAG); } +SDValue +VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + + // Generate the following code: + // t1: ch,glue = callseq_start t0, 0, 0 + // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1 + // t3: ch,glue = callseq_end t2, 0, 0, t2:2 + // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1 + SDValue Label = withTargetFlags(Op, 0, DAG); + EVT PtrVT = Op.getValueType(); + + // Lowering the machine isd will make sure everything is in the right + // location. + SDValue Chain = DAG.getEntryNode(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask( + DAG.getMachineFunction(), CallingConv::C); + Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl); + SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)}; + Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true), + DAG.getIntPtrConstant(0, dl, true), + Chain.getValue(1), dl); + Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1)); + + // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls. + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setHasCalls(true); + + // Also generate code to prepare a GOT register if it is PIC. + if (isPositionIndependent()) { + MachineFunction &MF = DAG.getMachineFunction(); + Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); + } + + return Chain; +} + +SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + // The current implementation of nld (2.26) doesn't allow local exec model + // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always + // generate the general dynamic model code sequence. + // + // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf + return LowerToTLSGeneralDynamicModel(Op, DAG); +} + SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); VEMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -816,6 +868,8 @@ return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -216,6 +216,12 @@ // GETFUNPLT for PIC def GetFunPLT : SDNode<"VEISD::GETFUNPLT", SDTIntUnaryOp>; +// GETTLSADDR for TLS +def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + + //===----------------------------------------------------------------------===// // VE Flag Conditions @@ -1027,6 +1033,11 @@ "b.l (,%lr)", [(retflag)]>; +// Branch and Save IC + +let cx = 0, cy = 0, cy = 0, cz = 1, hasSideEffects = 0 /* , Uses = [IC] */ in +def BSIC : RM<0x08, (outs), (ins I64:$sx, I64:$sz), "bsic $sx, (, ${sz})">; + // Branch instruction let cx = 0, cx2 = 0, bpf = 0 /* NONE */ in defm BCRL : BCRm<"br${cf}.l", "br.l", 0x18, I64, i64, simm7Op64, uimm6Op64>; @@ -1197,6 +1208,13 @@ (LEASLrzi (ANDrm0 (LEAzzi tglobaladdr:$in2), 32), (tglobaladdr:$in1))>; +// GlobalTLS address calculation and its optimization +def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzzi tglobaltlsaddr:$in)>; +def : Pat<(VElo tglobaltlsaddr:$in), (ANDrm0 (LEAzzi tglobaltlsaddr:$in), 32)>; +def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)), + (LEASLrzi (ANDrm0 (LEAzzi tglobaltlsaddr:$in2), 32), + (tglobaltlsaddr:$in1))>; + // Address calculation and its optimization def : Pat<(VEhi texternalsym:$in), (LEASLzzi texternalsym:$in)>; def : Pat<(VElo texternalsym:$in), (ANDrm0 (LEAzzi texternalsym:$in), 32)>; @@ -1247,6 +1265,14 @@ def : Pat<(GetFunPLT texternalsym:$dst), (GETFUNPLT texternalsym:$dst)>; +// GETTLSADDR for TLS +let Defs = [SX0, SX10, SX12], hasSideEffects = 0 in +def GETTLSADDR : Pseudo<(outs), (ins i64imm:$addr), + "# GETTLSADDR $addr", + [(GetTLSAddr tglobaltlsaddr:$addr)] >; + +def : Pat<(GetTLSAddr tglobaltlsaddr:$dst), + (GETTLSADDR tglobaltlsaddr:$dst)>; let Defs = [SX11], Uses = [SX11], hasSideEffects = 0 in { def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt, i64imm:$amt2), diff --git a/llvm/test/CodeGen/VE/tls.ll b/llvm/test/CodeGen/VE/tls.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/tls.ll @@ -0,0 +1,381 @@ +; FIXME: even under non-pic mode, llvm needs to generate pic code since nld +; doesn't work with non-pic code. Thefore, we test pic codes for +; both cases here. +; llc -mtriple ve < %s | FileCheck %s -check-prefix=LOCAL +; RUN: llc -mtriple ve < %s | FileCheck %s -check-prefix=GENDYN +; RUN: llc -mtriple ve -relocation-model=pic < %s | FileCheck %s -check-prefix=GENDYNPIC + +@x = external thread_local global i32, align 4 +@y = internal thread_local global i32 0, align 4 + +; Function Attrs: norecurse nounwind readnone +define nonnull i32* @get_global() { +; GENDYN-LABEL: get_global: +; GENDYN: # %bb.0: # %entry +; GENDYN-NEXT: st %s9, (,%s11) +; GENDYN-NEXT: st %s10, 8(,%s11) +; GENDYN-NEXT: st %s15, 24(,%s11) +; GENDYN-NEXT: st %s16, 32(,%s11) +; GENDYN-NEXT: or %s9, 0, %s11 +; GENDYN-NEXT: lea %s13, -240 +; GENDYN-NEXT: and %s13, %s13, (32)0 +; GENDYN-NEXT: lea.sl %s11, -1(%s11, %s13) +; GENDYN-NEXT: brge.l %s11, %s8, .LBB0_2 +; GENDYN-NEXT: # %bb.1: # %entry +; GENDYN-NEXT: ld %s61, 24(,%s14) +; GENDYN-NEXT: or %s62, 0, %s0 +; GENDYN-NEXT: lea %s63, 315 +; GENDYN-NEXT: shm.l %s63, (%s61) +; GENDYN-NEXT: shm.l %s8, 8(%s61) +; GENDYN-NEXT: shm.l %s11, 16(%s61) +; GENDYN-NEXT: monc +; GENDYN-NEXT: or %s0, 0, %s62 +; GENDYN-NEXT: .LBB0_2: # %entry +; GENDYN-NEXT: lea %s0, x@tls_gd_lo(-24) +; GENDYN-NEXT: and %s0, %s0, (32)0 +; GENDYN-NEXT: sic %s10 +; GENDYN-NEXT: lea.sl %s0, x@tls_gd_hi(%s10, %s0) +; GENDYN-NEXT: lea %s12, __tls_get_addr@plt_lo(8) +; GENDYN-NEXT: and %s12, %s12, (32)0 +; GENDYN-NEXT: lea.sl %s12, __tls_get_addr@plt_hi(%s10, %s12) +; GENDYN-NEXT: bsic %s10, (, %s12) +; GENDYN-NEXT: or %s11, 0, %s9 +; GENDYN-NEXT: ld %s16, 32(,%s11) +; GENDYN-NEXT: ld %s15, 24(,%s11) +; GENDYN-NEXT: ld %s10, 8(,%s11) +; GENDYN-NEXT: ld %s9, (,%s11) +; GENDYN-NEXT: b.l (,%lr) +; +; GENDYNPIC-LABEL: get_global: +; GENDYNPIC: # %bb.0: # %entry +; GENDYNPIC-NEXT: st %s9, (,%s11) +; GENDYNPIC-NEXT: st %s10, 8(,%s11) +; GENDYNPIC-NEXT: st %s15, 24(,%s11) +; GENDYNPIC-NEXT: st %s16, 32(,%s11) +; GENDYNPIC-NEXT: or %s9, 0, %s11 +; GENDYNPIC-NEXT: lea %s13, -240 +; GENDYNPIC-NEXT: and %s13, %s13, (32)0 +; GENDYNPIC-NEXT: lea.sl %s11, -1(%s11, %s13) +; GENDYNPIC-NEXT: brge.l %s11, %s8, .LBB0_2 +; GENDYNPIC-NEXT: # %bb.1: # %entry +; GENDYNPIC-NEXT: ld %s61, 24(,%s14) +; GENDYNPIC-NEXT: or %s62, 0, %s0 +; GENDYNPIC-NEXT: lea %s63, 315 +; GENDYNPIC-NEXT: shm.l %s63, (%s61) +; GENDYNPIC-NEXT: shm.l %s8, 8(%s61) +; GENDYNPIC-NEXT: shm.l %s11, 16(%s61) +; GENDYNPIC-NEXT: monc +; GENDYNPIC-NEXT: or %s0, 0, %s62 +; GENDYNPIC-NEXT: .LBB0_2: # %entry +; GENDYNPIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) +; GENDYNPIC-NEXT: and %s15, %s15, (32)0 +; GENDYNPIC-NEXT: sic %s16 +; GENDYNPIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; GENDYNPIC-NEXT: lea %s0, x@tls_gd_lo(-24) +; GENDYNPIC-NEXT: and %s0, %s0, (32)0 +; GENDYNPIC-NEXT: sic %s10 +; GENDYNPIC-NEXT: lea.sl %s0, x@tls_gd_hi(%s10, %s0) +; GENDYNPIC-NEXT: lea %s12, __tls_get_addr@plt_lo(8) +; GENDYNPIC-NEXT: and %s12, %s12, (32)0 +; GENDYNPIC-NEXT: lea.sl %s12, __tls_get_addr@plt_hi(%s10, %s12) +; GENDYNPIC-NEXT: bsic %s10, (, %s12) +; GENDYNPIC-NEXT: or %s11, 0, %s9 +; GENDYNPIC-NEXT: ld %s16, 32(,%s11) +; GENDYNPIC-NEXT: ld %s15, 24(,%s11) +; GENDYNPIC-NEXT: ld %s10, 8(,%s11) +; GENDYNPIC-NEXT: ld %s9, (,%s11) +; GENDYNPIC-NEXT: b.l (,%lr) +; LOCAL-LABEL: get_global: +; LOCAL: .LBB{{[0-9]+}}_2: +; LOCAL-NEXT: lea %s34, x@tpoff_lo +; LOCAL-NEXT: and %s34, %s34, (32)0 +; LOCAL-NEXT: lea.sl %s34, x@tpoff_hi(%s34) +; LOCAL-NEXT: adds.l %s0, %s14, %s34 +; LOCAL-NEXT: or %s11, 0, %s9 +entry: + ret i32* @x +} + +; Function Attrs: norecurse nounwind readnone +define nonnull i32* @get_local() { +; GENDYN-LABEL: get_local: +; GENDYN: # %bb.0: # %entry +; GENDYN-NEXT: st %s9, (,%s11) +; GENDYN-NEXT: st %s10, 8(,%s11) +; GENDYN-NEXT: st %s15, 24(,%s11) +; GENDYN-NEXT: st %s16, 32(,%s11) +; GENDYN-NEXT: or %s9, 0, %s11 +; GENDYN-NEXT: lea %s13, -240 +; GENDYN-NEXT: and %s13, %s13, (32)0 +; GENDYN-NEXT: lea.sl %s11, -1(%s11, %s13) +; GENDYN-NEXT: brge.l %s11, %s8, .LBB1_2 +; GENDYN-NEXT: # %bb.1: # %entry +; GENDYN-NEXT: ld %s61, 24(,%s14) +; GENDYN-NEXT: or %s62, 0, %s0 +; GENDYN-NEXT: lea %s63, 315 +; GENDYN-NEXT: shm.l %s63, (%s61) +; GENDYN-NEXT: shm.l %s8, 8(%s61) +; GENDYN-NEXT: shm.l %s11, 16(%s61) +; GENDYN-NEXT: monc +; GENDYN-NEXT: or %s0, 0, %s62 +; GENDYN-NEXT: .LBB1_2: # %entry +; GENDYN-NEXT: lea %s0, y@tls_gd_lo(-24) +; GENDYN-NEXT: and %s0, %s0, (32)0 +; GENDYN-NEXT: sic %s10 +; GENDYN-NEXT: lea.sl %s0, y@tls_gd_hi(%s10, %s0) +; GENDYN-NEXT: lea %s12, __tls_get_addr@plt_lo(8) +; GENDYN-NEXT: and %s12, %s12, (32)0 +; GENDYN-NEXT: lea.sl %s12, __tls_get_addr@plt_hi(%s10, %s12) +; GENDYN-NEXT: bsic %s10, (, %s12) +; GENDYN-NEXT: or %s11, 0, %s9 +; GENDYN-NEXT: ld %s16, 32(,%s11) +; GENDYN-NEXT: ld %s15, 24(,%s11) +; GENDYN-NEXT: ld %s10, 8(,%s11) +; GENDYN-NEXT: ld %s9, (,%s11) +; GENDYN-NEXT: b.l (,%lr) +; +; GENDYNPIC-LABEL: get_local: +; GENDYNPIC: # %bb.0: # %entry +; GENDYNPIC-NEXT: st %s9, (,%s11) +; GENDYNPIC-NEXT: st %s10, 8(,%s11) +; GENDYNPIC-NEXT: st %s15, 24(,%s11) +; GENDYNPIC-NEXT: st %s16, 32(,%s11) +; GENDYNPIC-NEXT: or %s9, 0, %s11 +; GENDYNPIC-NEXT: lea %s13, -240 +; GENDYNPIC-NEXT: and %s13, %s13, (32)0 +; GENDYNPIC-NEXT: lea.sl %s11, -1(%s11, %s13) +; GENDYNPIC-NEXT: brge.l %s11, %s8, .LBB1_2 +; GENDYNPIC-NEXT: # %bb.1: # %entry +; GENDYNPIC-NEXT: ld %s61, 24(,%s14) +; GENDYNPIC-NEXT: or %s62, 0, %s0 +; GENDYNPIC-NEXT: lea %s63, 315 +; GENDYNPIC-NEXT: shm.l %s63, (%s61) +; GENDYNPIC-NEXT: shm.l %s8, 8(%s61) +; GENDYNPIC-NEXT: shm.l %s11, 16(%s61) +; GENDYNPIC-NEXT: monc +; GENDYNPIC-NEXT: or %s0, 0, %s62 +; GENDYNPIC-NEXT: .LBB1_2: # %entry +; GENDYNPIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) +; GENDYNPIC-NEXT: and %s15, %s15, (32)0 +; GENDYNPIC-NEXT: sic %s16 +; GENDYNPIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; GENDYNPIC-NEXT: lea %s0, y@tls_gd_lo(-24) +; GENDYNPIC-NEXT: and %s0, %s0, (32)0 +; GENDYNPIC-NEXT: sic %s10 +; GENDYNPIC-NEXT: lea.sl %s0, y@tls_gd_hi(%s10, %s0) +; GENDYNPIC-NEXT: lea %s12, __tls_get_addr@plt_lo(8) +; GENDYNPIC-NEXT: and %s12, %s12, (32)0 +; GENDYNPIC-NEXT: lea.sl %s12, __tls_get_addr@plt_hi(%s10, %s12) +; GENDYNPIC-NEXT: bsic %s10, (, %s12) +; GENDYNPIC-NEXT: or %s11, 0, %s9 +; GENDYNPIC-NEXT: ld %s16, 32(,%s11) +; GENDYNPIC-NEXT: ld %s15, 24(,%s11) +; GENDYNPIC-NEXT: ld %s10, 8(,%s11) +; GENDYNPIC-NEXT: ld %s9, (,%s11) +; GENDYNPIC-NEXT: b.l (,%lr) +; LOCAL-LABEL: get_local: +; LOCAL: .LBB{{[0-9]+}}_2: +; LOCAL-NEXT: lea %s34, y@tpoff_lo +; LOCAL-NEXT: and %s34, %s34, (32)0 +; LOCAL-NEXT: lea.sl %s34, y@tpoff_hi(%s34) +; LOCAL-NEXT: adds.l %s0, %s14, %s34 +; LOCAL-NEXT: or %s11, 0, %s9 +entry: + ret i32* @y +} + +; Function Attrs: norecurse nounwind +define void @set_global(i32 %v) { +; GENDYN-LABEL: set_global: +; GENDYN: # %bb.0: # %entry +; GENDYN-NEXT: st %s9, (,%s11) +; GENDYN-NEXT: st %s10, 8(,%s11) +; GENDYN-NEXT: st %s15, 24(,%s11) +; GENDYN-NEXT: st %s16, 32(,%s11) +; GENDYN-NEXT: or %s9, 0, %s11 +; GENDYN-NEXT: lea %s13, -240 +; GENDYN-NEXT: and %s13, %s13, (32)0 +; GENDYN-NEXT: lea.sl %s11, -1(%s11, %s13) +; GENDYN-NEXT: brge.l %s11, %s8, .LBB2_2 +; GENDYN-NEXT: # %bb.1: # %entry +; GENDYN-NEXT: ld %s61, 24(,%s14) +; GENDYN-NEXT: or %s62, 0, %s0 +; GENDYN-NEXT: lea %s63, 315 +; GENDYN-NEXT: shm.l %s63, (%s61) +; GENDYN-NEXT: shm.l %s8, 8(%s61) +; GENDYN-NEXT: shm.l %s11, 16(%s61) +; GENDYN-NEXT: monc +; GENDYN-NEXT: or %s0, 0, %s62 +; GENDYN-NEXT: .LBB2_2: # %entry +; GENDYN-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill +; GENDYN-NEXT: or %s18, 0, %s0 +; GENDYN-NEXT: lea %s0, x@tls_gd_lo(-24) +; GENDYN-NEXT: and %s0, %s0, (32)0 +; GENDYN-NEXT: sic %s10 +; GENDYN-NEXT: lea.sl %s0, x@tls_gd_hi(%s10, %s0) +; GENDYN-NEXT: lea %s12, __tls_get_addr@plt_lo(8) +; GENDYN-NEXT: and %s12, %s12, (32)0 +; GENDYN-NEXT: lea.sl %s12, __tls_get_addr@plt_hi(%s10, %s12) +; GENDYN-NEXT: bsic %s10, (, %s12) +; GENDYN-NEXT: stl %s18, (,%s0) +; GENDYN-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload +; GENDYN-NEXT: or %s11, 0, %s9 +; GENDYN-NEXT: ld %s16, 32(,%s11) +; GENDYN-NEXT: ld %s15, 24(,%s11) +; GENDYN-NEXT: ld %s10, 8(,%s11) +; GENDYN-NEXT: ld %s9, (,%s11) +; GENDYN-NEXT: b.l (,%lr) +; +; GENDYNPIC-LABEL: set_global: +; GENDYNPIC: # %bb.0: # %entry +; GENDYNPIC-NEXT: st %s9, (,%s11) +; GENDYNPIC-NEXT: st %s10, 8(,%s11) +; GENDYNPIC-NEXT: st %s15, 24(,%s11) +; GENDYNPIC-NEXT: st %s16, 32(,%s11) +; GENDYNPIC-NEXT: or %s9, 0, %s11 +; GENDYNPIC-NEXT: lea %s13, -240 +; GENDYNPIC-NEXT: and %s13, %s13, (32)0 +; GENDYNPIC-NEXT: lea.sl %s11, -1(%s11, %s13) +; GENDYNPIC-NEXT: brge.l %s11, %s8, .LBB2_2 +; GENDYNPIC-NEXT: # %bb.1: # %entry +; GENDYNPIC-NEXT: ld %s61, 24(,%s14) +; GENDYNPIC-NEXT: or %s62, 0, %s0 +; GENDYNPIC-NEXT: lea %s63, 315 +; GENDYNPIC-NEXT: shm.l %s63, (%s61) +; GENDYNPIC-NEXT: shm.l %s8, 8(%s61) +; GENDYNPIC-NEXT: shm.l %s11, 16(%s61) +; GENDYNPIC-NEXT: monc +; GENDYNPIC-NEXT: or %s0, 0, %s62 +; GENDYNPIC-NEXT: .LBB2_2: # %entry +; GENDYNPIC-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill +; GENDYNPIC-NEXT: or %s18, 0, %s0 +; GENDYNPIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) +; GENDYNPIC-NEXT: and %s15, %s15, (32)0 +; GENDYNPIC-NEXT: sic %s16 +; GENDYNPIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; GENDYNPIC-NEXT: lea %s0, x@tls_gd_lo(-24) +; GENDYNPIC-NEXT: and %s0, %s0, (32)0 +; GENDYNPIC-NEXT: sic %s10 +; GENDYNPIC-NEXT: lea.sl %s0, x@tls_gd_hi(%s10, %s0) +; GENDYNPIC-NEXT: lea %s12, __tls_get_addr@plt_lo(8) +; GENDYNPIC-NEXT: and %s12, %s12, (32)0 +; GENDYNPIC-NEXT: lea.sl %s12, __tls_get_addr@plt_hi(%s10, %s12) +; GENDYNPIC-NEXT: bsic %s10, (, %s12) +; GENDYNPIC-NEXT: stl %s18, (,%s0) +; GENDYNPIC-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload +; GENDYNPIC-NEXT: or %s11, 0, %s9 +; GENDYNPIC-NEXT: ld %s16, 32(,%s11) +; GENDYNPIC-NEXT: ld %s15, 24(,%s11) +; GENDYNPIC-NEXT: ld %s10, 8(,%s11) +; GENDYNPIC-NEXT: ld %s9, (,%s11) +; GENDYNPIC-NEXT: b.l (,%lr) +; LOCAL-LABEL: set_global: +; LOCAL: .LBB{{[0-9]+}}_2: +; LOCAL-NEXT: lea %s34, x@tpoff_lo +; LOCAL-NEXT: and %s34, %s34, (32)0 +; LOCAL-NEXT: lea.sl %s34, x@tpoff_hi(%s34) +; LOCAL-NEXT: adds.l %s34, %s14, %s34 +; LOCAL-NEXT: stl %s0, (,%s34) +; LOCAL-NEXT: or %s11, 0, %s9 +entry: + store i32 %v, i32* @x, align 4 + ret void +} + +; Function Attrs: norecurse nounwind +define void @set_local(i32 %v) { +; GENDYN-LABEL: set_local: +; GENDYN: # %bb.0: # %entry +; GENDYN-NEXT: st %s9, (,%s11) +; GENDYN-NEXT: st %s10, 8(,%s11) +; GENDYN-NEXT: st %s15, 24(,%s11) +; GENDYN-NEXT: st %s16, 32(,%s11) +; GENDYN-NEXT: or %s9, 0, %s11 +; GENDYN-NEXT: lea %s13, -240 +; GENDYN-NEXT: and %s13, %s13, (32)0 +; GENDYN-NEXT: lea.sl %s11, -1(%s11, %s13) +; GENDYN-NEXT: brge.l %s11, %s8, .LBB3_2 +; GENDYN-NEXT: # %bb.1: # %entry +; GENDYN-NEXT: ld %s61, 24(,%s14) +; GENDYN-NEXT: or %s62, 0, %s0 +; GENDYN-NEXT: lea %s63, 315 +; GENDYN-NEXT: shm.l %s63, (%s61) +; GENDYN-NEXT: shm.l %s8, 8(%s61) +; GENDYN-NEXT: shm.l %s11, 16(%s61) +; GENDYN-NEXT: monc +; GENDYN-NEXT: or %s0, 0, %s62 +; GENDYN-NEXT: .LBB3_2: # %entry +; GENDYN-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill +; GENDYN-NEXT: or %s18, 0, %s0 +; GENDYN-NEXT: lea %s0, y@tls_gd_lo(-24) +; GENDYN-NEXT: and %s0, %s0, (32)0 +; GENDYN-NEXT: sic %s10 +; GENDYN-NEXT: lea.sl %s0, y@tls_gd_hi(%s10, %s0) +; GENDYN-NEXT: lea %s12, __tls_get_addr@plt_lo(8) +; GENDYN-NEXT: and %s12, %s12, (32)0 +; GENDYN-NEXT: lea.sl %s12, __tls_get_addr@plt_hi(%s10, %s12) +; GENDYN-NEXT: bsic %s10, (, %s12) +; GENDYN-NEXT: stl %s18, (,%s0) +; GENDYN-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload +; GENDYN-NEXT: or %s11, 0, %s9 +; GENDYN-NEXT: ld %s16, 32(,%s11) +; GENDYN-NEXT: ld %s15, 24(,%s11) +; GENDYN-NEXT: ld %s10, 8(,%s11) +; GENDYN-NEXT: ld %s9, (,%s11) +; GENDYN-NEXT: b.l (,%lr) +; +; GENDYNPIC-LABEL: set_local: +; GENDYNPIC: # %bb.0: # %entry +; GENDYNPIC-NEXT: st %s9, (,%s11) +; GENDYNPIC-NEXT: st %s10, 8(,%s11) +; GENDYNPIC-NEXT: st %s15, 24(,%s11) +; GENDYNPIC-NEXT: st %s16, 32(,%s11) +; GENDYNPIC-NEXT: or %s9, 0, %s11 +; GENDYNPIC-NEXT: lea %s13, -240 +; GENDYNPIC-NEXT: and %s13, %s13, (32)0 +; GENDYNPIC-NEXT: lea.sl %s11, -1(%s11, %s13) +; GENDYNPIC-NEXT: brge.l %s11, %s8, .LBB3_2 +; GENDYNPIC-NEXT: # %bb.1: # %entry +; GENDYNPIC-NEXT: ld %s61, 24(,%s14) +; GENDYNPIC-NEXT: or %s62, 0, %s0 +; GENDYNPIC-NEXT: lea %s63, 315 +; GENDYNPIC-NEXT: shm.l %s63, (%s61) +; GENDYNPIC-NEXT: shm.l %s8, 8(%s61) +; GENDYNPIC-NEXT: shm.l %s11, 16(%s61) +; GENDYNPIC-NEXT: monc +; GENDYNPIC-NEXT: or %s0, 0, %s62 +; GENDYNPIC-NEXT: .LBB3_2: # %entry +; GENDYNPIC-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill +; GENDYNPIC-NEXT: or %s18, 0, %s0 +; GENDYNPIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) +; GENDYNPIC-NEXT: and %s15, %s15, (32)0 +; GENDYNPIC-NEXT: sic %s16 +; GENDYNPIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; GENDYNPIC-NEXT: lea %s0, y@tls_gd_lo(-24) +; GENDYNPIC-NEXT: and %s0, %s0, (32)0 +; GENDYNPIC-NEXT: sic %s10 +; GENDYNPIC-NEXT: lea.sl %s0, y@tls_gd_hi(%s10, %s0) +; GENDYNPIC-NEXT: lea %s12, __tls_get_addr@plt_lo(8) +; GENDYNPIC-NEXT: and %s12, %s12, (32)0 +; GENDYNPIC-NEXT: lea.sl %s12, __tls_get_addr@plt_hi(%s10, %s12) +; GENDYNPIC-NEXT: bsic %s10, (, %s12) +; GENDYNPIC-NEXT: stl %s18, (,%s0) +; GENDYNPIC-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload +; GENDYNPIC-NEXT: or %s11, 0, %s9 +; GENDYNPIC-NEXT: ld %s16, 32(,%s11) +; GENDYNPIC-NEXT: ld %s15, 24(,%s11) +; GENDYNPIC-NEXT: ld %s10, 8(,%s11) +; GENDYNPIC-NEXT: ld %s9, (,%s11) +; GENDYNPIC-NEXT: b.l (,%lr) +; LOCAL-LABEL: set_local: +; LOCAL: .LBB{{[0-9]+}}_2: +; LOCAL-NEXT: lea %s34, y@tpoff_lo +; LOCAL-NEXT: and %s34, %s34, (32)0 +; LOCAL-NEXT: lea.sl %s34, y@tpoff_hi(%s34) +; LOCAL-NEXT: adds.l %s34, %s14, %s34 +; LOCAL-NEXT: stl %s0, (,%s34) +; LOCAL-NEXT: or %s11, 0, %s9 +entry: + store i32 %v, i32* @y, align 4 + ret void +}