Index: lib/Target/AArch64/AArch64.h =================================================================== --- lib/Target/AArch64/AArch64.h +++ lib/Target/AArch64/AArch64.h @@ -46,6 +46,7 @@ FunctionPass *createAArch64A53Fix835769(); FunctionPass *createFalkorHWPFFixPass(); FunctionPass *createFalkorMarkStridedAccessesPass(); +FunctionPass *createAArch64PagerandoOptimizerPass(); FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); @@ -70,6 +71,7 @@ void initializeAArch64StorePairSuppressPass(PassRegistry&); void initializeFalkorHWPFFixPass(PassRegistry&); void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&); +void initializeAArch64PagerandoOptimizerPass(PassRegistry&); void initializeLDTLSCleanupPass(PassRegistry&); } // end namespace llvm Index: lib/Target/AArch64/AArch64CallLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64CallLowering.cpp +++ lib/Target/AArch64/AArch64CallLowering.cpp @@ -340,6 +340,11 @@ MachineRegisterInfo &MRI = MF.getRegInfo(); auto &DL = F.getParent()->getDataLayout(); + // Can't handle PIP + const Function *CalleeF = Callee.isGlobal() ? dyn_cast(Callee.getGlobal()) : nullptr; + if (F.isPagerando() || (CalleeF && CalleeF->isPagerando())) + return false; + SmallVector SplitArgs; for (auto &OrigArg : OrigArgs) { splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv, Index: lib/Target/AArch64/AArch64CollectLOH.cpp =================================================================== --- lib/Target/AArch64/AArch64CollectLOH.cpp +++ lib/Target/AArch64/AArch64CollectLOH.cpp @@ -187,7 +187,7 @@ default: return false; case MachineOperand::MO_GlobalAddress: - return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT; + return (MI.getOperand(2).getTargetFlags() & AArch64II::MO_SOURCE) == AArch64II::MO_GOT; } } } @@ -234,7 +234,7 @@ case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: - return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT); + return (MI.getOperand(2).getTargetFlags() & AArch64II::MO_SOURCE) != AArch64II::MO_GOT; } } @@ -314,7 +314,7 @@ Info.IsCandidate = true; Info.MI0 = &MI; } else if (MI.getOpcode() == AArch64::LDRXui && - MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) { + (MI.getOperand(2).getTargetFlags() & AArch64II::MO_SOURCE) == AArch64II::MO_GOT) { Info.Type = MCLOH_AdrpLdrGot; Info.IsCandidate = true; Info.MI0 = &MI; @@ -359,7 +359,7 @@ } } else { assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui"); - assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) && + assert(((MI.getOperand(2).getTargetFlags() & AArch64II::MO_SOURCE) == AArch64II::MO_GOT) && "Expected GOT relocation"); if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { OpInfo.Type = MCLOH_AdrpLdrGotStr; Index: lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -869,6 +869,92 @@ return true; } + case AArch64::LOADgotr: { + unsigned DstReg = MI.getOperand(0).getReg(); + const MachineOperand &Base = MI.getOperand(1); + const MachineOperand &Global = MI.getOperand(2); + unsigned Flags = Global.getTargetFlags(); + + if (Global.isGlobal()) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui), DstReg) + .add(Base) + .addGlobalAddress(Global.getGlobal(), 0, Flags); + transferImpOps(MI, MIB, MIB); + } else if (Global.isSymbol()) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui), DstReg) + .add(Base) + .addExternalSymbol(Global.getSymbolName(), Flags); + transferImpOps(MI, MIB, MIB); + } else { + assert(Global.isReg() && + "Only expect global immediate or register offset"); + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXroX), DstReg) + .add(Base) + .add(Global) + .addImm(0) + .addImm(0); + transferImpOps(MI, MIB, MIB); + } + MI.eraseFromParent(); + return true; + } + + case AArch64::LOADpot: { + unsigned DstReg = MI.getOperand(0).getReg(); + const MachineOperand &Base = MI.getOperand(1); + const MachineOperand &Offset = MI.getOperand(2); + unsigned Flags = Offset.getTargetFlags(); + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui), DstReg) + .add(Base); + + if (Offset.isGlobal()) { + MIB.addGlobalAddress(Offset.getGlobal(), 0, Flags | AArch64II::MO_POT); + } else if (Offset.isImm()) { + MIB.addImm(Offset.getImm()); + } else { + assert(Offset.isCPI() && "Only expect globals, immediates, or constant pools"); + MIB.addConstantPoolIndex(Offset.getIndex(), Offset.getOffset(), + Flags | AArch64II::MO_POT); + + } + + transferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } + + case AArch64::MOVaddrBIN: { + unsigned DstReg = MI.getOperand(0).getReg(); + const MachineOperand &Base = MI.getOperand(1); + const MachineOperand &Global = MI.getOperand(2); + unsigned Flags = Global.getTargetFlags(); + + // TODO(sjc): We need to add a page index to the bin address because we + // don't (yet) enforce that bins are <= 4096 bytes. If we can ensure that at + // least all destinations in a bin are on the first page, we can drop this + // instruction. + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri), DstReg) + .add(Base) + .addGlobalAddress(Global.getGlobal(), 0, Flags | AArch64II::MO_HI12) + .addImm(12); + + MachineInstrBuilder MIB2 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri), DstReg) + .addReg(DstReg) + .addGlobalAddress(Global.getGlobal(), 0, Flags | AArch64II::MO_PAGEOFF) + .addImm(0); + + transferImpOps(MI, MIB1, MIB2); + MI.eraseFromParent(); + return true; + } + case AArch64::MOVaddr: case AArch64::MOVaddrJT: case AArch64::MOVaddrCP: Index: lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- lib/Target/AArch64/AArch64FastISel.cpp +++ lib/Target/AArch64/AArch64FastISel.cpp @@ -458,6 +458,13 @@ if (GV->isThreadLocal()) return 0; + auto F = dyn_cast(GV); + if (MF->getFunction().isPagerando() || + (F && F->isPagerando())) { + // TODO(sjc): Implement PIP + return 0; + } + // MachO still uses GOT for large code-model accesses, but ELF requires // movz/movk sequences, which FastISel doesn't handle yet. if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) @@ -472,7 +479,7 @@ unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); unsigned ResultReg; - if (OpFlags & AArch64II::MO_GOT) { + if ((OpFlags & AArch64II::MO_SOURCE) == AArch64II::MO_GOT) { // ADRP + LDRX BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), ADRPReg) @@ -3170,6 +3177,11 @@ if (IsVarArg) return false; + // Can't handle PIP + const Function *F = dyn_cast_or_null(Callee); + if (FuncInfo.MF->getFunction().isPagerando() || (F && F->isPagerando())) + return false; + // FIXME: Only handle *simple* calls for now. MVT RetVT; if (CLI.RetTy->isVoidTy()) Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -38,6 +38,9 @@ ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. LOADgot, // Load from automatically generated descriptor (e.g. Global // Offset Table, TLS record). + LOADgotr, // Load from automatically generated descriptor (e.g. Global + // Offset Table, TLS record) via a register base address. + LOADpot, // Load from page linking table (POT) RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. BRCOND, // Conditional branch instruction; "b.cond". CSEL, @@ -580,9 +583,13 @@ unsigned Flag) const; SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const; + SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; template SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; template + SDValue getPOT(NodeTy *N, SelectionDAG &DAG) const; + template SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; template SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; @@ -601,6 +608,7 @@ SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal, const SDLoc &dl, SelectionDAG &DAG) const; + SDValue LowerPOT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -130,6 +130,10 @@ addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); + // TODO: Decide if we want to stick with the platform register + if (TM.isPagerando()) + setPOTBaseRegister(AArch64::X20); + if (Subtarget->hasFPARMv8()) { addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); @@ -267,6 +271,8 @@ // BlockAddress setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + setOperationAction(ISD::PAGE_OFFSET_TABLE, MVT::i64, Custom); + // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences. setOperationAction(ISD::ADDC, MVT::i32, Custom); setOperationAction(ISD::ADDE, MVT::i32, Custom); @@ -1088,6 +1094,8 @@ case AArch64ISD::ADRP: return "AArch64ISD::ADRP"; case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow"; case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot"; + case AArch64ISD::LOADgotr: return "AArch64ISD::LOADgotr"; + case AArch64ISD::LOADpot: return "AArch64ISD::LOADpot"; case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG"; case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND"; case AArch64ISD::CSEL: return "AArch64ISD::CSEL"; @@ -2754,6 +2762,8 @@ return LowerGlobalTLSAddress(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::PAGE_OFFSET_TABLE: + return LowerPOT(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT: @@ -3279,6 +3289,22 @@ if (i->hasByValAttr()) return false; + // Calls to pagerando functions from non-pagerando (legacy) functions must + // initialize the POT register, which is callee-saved. Thus we need to restore + // the original value of the POT register after the call and cannot tail call. + if (!CallerF.isPagerando()) { + if (auto *G = dyn_cast(Callee)) { + auto *GV = G->getGlobal(); + auto *F = dyn_cast(GV); + if (auto *GA = dyn_cast(GV)) { + if (auto *Aliasee = dyn_cast(GA->getAliasee())) + F = dyn_cast(Aliasee); + } + if (F && F->isPagerando()) + return false; + } + } + if (getTargetMachine().Options.GuaranteedTailCallOpt) return canGuaranteeTCO(CalleeCC) && CCMatch; @@ -3661,9 +3687,19 @@ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. if (auto *G = dyn_cast(Callee)) { - auto GV = G->getGlobal(); - if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) == - AArch64II::MO_GOT) { + auto *GV = G->getGlobal(); + auto *F = dyn_cast(GV); + if (auto *GA = dyn_cast(GV)) { + if (auto *Aliasee = dyn_cast(GA->getAliasee())) + F = dyn_cast(Aliasee); + } + bool UsePIPAddressing = MF.getFunction().isPagerando() || + (F && F->isPagerando()); + unsigned char OpFlags = + Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()); + if (UsePIPAddressing) { + Callee = getPOT(G, DAG); + } else if ((OpFlags & AArch64II::MO_SOURCE) == AArch64II::MO_GOT) { Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT); Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) { @@ -3675,7 +3711,9 @@ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); } } else if (auto *S = dyn_cast(Callee)) { - if (getTargetMachine().getCodeModel() == CodeModel::Large && + if (MF.getFunction().isPagerando()) { + Callee = getPOT(S, DAG); + } else if (getTargetMachine().getCodeModel() == CodeModel::Large && Subtarget->isTargetMachO()) { const char *Sym = S->getSymbol(); Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT); @@ -3869,6 +3907,12 @@ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); } +SDValue AArch64TargetLowering::getTargetNode(ExternalSymbolSDNode* N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); +} + // (loadGOT sym) template SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG, @@ -3882,6 +3926,100 @@ return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr); } +// Position-independent pages, access through the POT +template +SDValue AArch64TargetLowering::getPOT(NodeTy *N, SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getPOT\n"); + + // Pagerando targets DSOs specifically, and the large code model can only be + // used for statically linked binaries. Thus, we do not support a large code + // model. + assert(getTargetMachine().getCodeModel() != CodeModel::Large); + + SDLoc DL(N); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + const Function *F = nullptr; + unsigned char OpFlags = 0; + if (auto *GN = dyn_cast(N)) { + const GlobalValue *GV = GN->getGlobal(); + F = dyn_cast(GV); + OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); + if (auto *GA = dyn_cast(GV)) { + if (auto *Aliasee = dyn_cast(GA->getAliasee())) + F = dyn_cast(Aliasee); + } + } else if (isa(N)) { + // Calls from PIP functions to external symbols should go through the GOT + // for now so that we can properly indirect through POT. + OpFlags = AArch64II::MO_GOT; + } + bool pagerandoBinTarget = F && F->isPagerando(); + + SDValue POTValue = DAG.getNode(ISD::PAGE_OFFSET_TABLE, DL, + DAG.getVTList(MVT::i64, MVT::Other)); + SDValue Chain = POTValue.getValue(1); + + if ((OpFlags & AArch64II::MO_SOURCE) == AArch64II::MO_GOT) { + // Load the GOT address from the POT + SDValue GOTAddr = DAG.getNode(AArch64ISD::LOADpot, DL, PtrVT, Chain, POTValue, + DAG.getTargetConstant(0, DL, MVT::i32)); + + const Module *M = DAG.getMachineFunction().getFunction().getParent(); + PICLevel::Level picLevel = M->getPICLevel(); + + SDValue Offset; + const unsigned char MO_NC = AArch64II::MO_NC; + const unsigned char MO_GOTOFF = AArch64II::MO_GOTOFF; + if (picLevel == PICLevel::SmallPIC) { + // GOT size <= 28KiB + Offset = getTargetNode(N, PtrVT, DAG, MO_GOTOFF); + } else { + // Large GOT size + Offset = DAG.getNode( + AArch64ISD::WrapperLarge, DL, PtrVT, + getTargetNode(N, PtrVT, DAG, MO_GOTOFF | AArch64II::MO_G3), + getTargetNode(N, PtrVT, DAG, MO_GOTOFF | AArch64II::MO_G2 | MO_NC), + getTargetNode(N, PtrVT, DAG, MO_GOTOFF | AArch64II::MO_G1 | MO_NC), + getTargetNode(N, PtrVT, DAG, MO_GOTOFF | AArch64II::MO_G0 | MO_NC)); + } + + return DAG.getNode(AArch64ISD::LOADgotr, DL, PtrVT, GOTAddr, Offset); + } else if (pagerandoBinTarget) { + // We may have an alias, so we need to use the real target function for the + // POT offset + SDValue POTOffset = DAG.getTargetGlobalAddress(F, DL, PtrVT, 0, AArch64II::MO_POT); + SDValue BaseAddr = DAG.getNode(AArch64ISD::LOADpot, DL, PtrVT, Chain, + POTValue, POTOffset); + + SDValue Offset = DAG.getTargetGlobalAddress(F, DL, PtrVT, 0, AArch64II::MO_SEC); + + return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, Offset); + } else { + // Load the GOT address from the POT + SDValue GOTAddr = DAG.getNode(AArch64ISD::LOADpot, DL, PtrVT, Chain, POTValue, + DAG.getTargetConstant(0, DL, MVT::i32)); + + SDValue Hi = getTargetNode(N, PtrVT, DAG, AArch64II::MO_PAGE); + SDValue Lo = getTargetNode(N, PtrVT, DAG, + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); + SDValue TargetPCRel = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + + Hi = DAG.getTargetExternalSymbol("_GLOBAL_OFFSET_TABLE_", PtrVT, + AArch64II::MO_PAGE); + Lo = DAG.getTargetExternalSymbol("_GLOBAL_OFFSET_TABLE_", PtrVT, + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + + ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); + SDValue GOTPCRel = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + + SDValue Offset = DAG.getNode(ISD::SUB, DL, PtrVT, TargetPCRel, GOTPCRel); + + return DAG.getNode(ISD::ADD, DL, PtrVT, GOTAddr, Offset); + } +} + // (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym)) template SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG, @@ -3926,8 +4064,20 @@ assert(cast(Op)->getOffset() == 0 && "unexpected offset in global node"); + MachineFunction &MF = DAG.getMachineFunction(); + auto *F = dyn_cast(GV); + if (auto *GA = dyn_cast(GV)) { + if (auto *Aliasee = dyn_cast(GA->getAliasee())) + F = dyn_cast(Aliasee); + } + bool pagerandoBinTarget = F && F->isPagerando(); + if (MF.getFunction().isPagerando() || + pagerandoBinTarget) { + return getPOT(GN, DAG); + } + // This also catches the large code model case for Darwin. - if ((OpFlags & AArch64II::MO_GOT) != 0) { + if ((OpFlags & AArch64II::MO_SOURCE) == AArch64II::MO_GOT) { return getGOT(GN, DAG, TargetFlags); } @@ -4533,6 +4683,26 @@ } } +SDValue AArch64TargetLowering::LowerPOT(SDValue Op, SelectionDAG &DAG) const { + assert(getTargetMachine().isPagerando() && + "POT lowering only supported with PIP relocation model"); + + SDLoc dl(Op); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); + unsigned POTReg = getPOTBaseRegister(); + if (MF.getFunction().isPagerando()) { + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, POTReg, PtrVT); + } else { + SDValue POTAddress = DAG.getTargetExternalSymbol("_PAGE_OFFSET_TABLE_", PtrVT, + AArch64II::MO_GOT); + POTAddress = DAG.getNode(AArch64ISD::LOADgot, dl, PtrVT, POTAddress); + SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, POTReg, POTAddress); + SDValue Ops[2] = { POTAddress, Chain }; + return DAG.getMergeValues(Ops, dl); + } +} + SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal, const SDLoc &dl, @@ -4787,6 +4957,10 @@ SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); + if (DAG.getMachineFunction().getFunction().isPagerando()) { + return getPOT(CP, DAG); + } + if (getTargetMachine().getCodeModel() == CodeModel::Large) { // Use the GOT for the large code model on iOS. if (Subtarget->isTargetMachO()) { Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1605,7 +1605,7 @@ unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM); const unsigned char MO_NC = AArch64II::MO_NC; - if ((OpFlags & AArch64II::MO_GOT) != 0) { + if ((OpFlags & AArch64II::MO_SOURCE) == AArch64II::MO_GOT) { BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) .addGlobalAddress(GV, 0, AArch64II::MO_GOT); BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -168,6 +168,9 @@ def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; +def AArch64LOADgotr : SDNode<"AArch64ISD::LOADgotr", SDTIntBinOp>; +def AArch64LOADpot : SDNode<"AArch64ISD::LOADpot", SDTIntBinOp, + [SDNPHasChain]>; def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, @@ -366,6 +369,15 @@ [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, Sched<[WriteLDAdr]>; +let AddedComplexity = 10 in +def LOADgotr : Pseudo<(outs GPR64:$dst), (ins GPR64:$base, i64imm:$addr), + [(set GPR64:$dst, (AArch64LOADgotr GPR64:$base, tglobaladdr:$addr))]>, + Sched<[WriteLD]>; + +def LOADpot : Pseudo<(outs GPR64:$dst), (ins GPR64:$base, i64imm:$addr), + [(set GPR64:$dst, (AArch64LOADpot GPR64:$base, tglobaladdr:$addr))]>, + Sched<[WriteLD]>; + // The MOVaddr instruction should match only when the add is not folded // into a load or store address. def MOVaddr @@ -398,6 +410,10 @@ [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), texternalsym:$low))]>, Sched<[WriteAdrAdr]>; +def MOVaddrBIN + : Pseudo<(outs GPR64:$dst), (ins GPR64:$base, i64imm:$low), + [(set GPR64:$dst, (add GPR64:$base, tglobaladdr:$low))]>, + Sched<[WriteAdr]>; // Normally AArch64addlow either gets folded into a following ldr/str, // or together with an adrp into MOVaddr above. For cases with TLS, it // might appear without either of them, so allow lowering it into a plain @@ -419,6 +435,15 @@ def : Pat<(AArch64LOADgot tconstpool:$addr), (LOADgot tconstpool:$addr)>; +def : Pat<(AArch64LOADgotr GPR64:$base, GPR64:$offset), + (LOADgotr GPR64:$base, GPR64:$offset)>; + +def : Pat<(AArch64LOADpot GPR64:$base, timm:$index), + (LOADpot GPR64:$base, imm:$index)>; + +def : Pat<(AArch64LOADpot GPR64:$base, tconstpool:$index), + (LOADpot GPR64:$base, tconstpool:$index)>; + //===----------------------------------------------------------------------===// // System instructions. //===----------------------------------------------------------------------===// @@ -760,6 +785,13 @@ tjumptable:$g2, 32), tjumptable:$g3, 48)>; +def : Pat<(AArch64WrapperLarge texternalsym:$g3, texternalsym:$g2, + texternalsym:$g1, texternalsym:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi texternalsym:$g0, 0), + texternalsym:$g1, 16), + texternalsym:$g2, 32), + texternalsym:$g3, 48)>; + //===----------------------------------------------------------------------===// // Arithmetic instructions. Index: lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- lib/Target/AArch64/AArch64InstructionSelector.cpp +++ lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -930,8 +930,14 @@ // FIXME: we don't support TLS yet. return false; } + auto F = dyn_cast(GV); + if (MF.getFunction().isPagerando() || + (F && F->isPagerando())) { + // TODO(sjc): Implement PIP + return false; + } unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM); - if (OpFlags & AArch64II::MO_GOT) { + if ((OpFlags & AArch64II::MO_SOURCE) == AArch64II::MO_GOT) { I.setDesc(TII.get(AArch64::LOADgot)); I.getOperand(1).setTargetFlags(OpFlags); } else if (TM.getCodeModel() == CodeModel::Large) { Index: lib/Target/AArch64/AArch64MCInstLower.cpp =================================================================== --- lib/Target/AArch64/AArch64MCInstLower.cpp +++ lib/Target/AArch64/AArch64MCInstLower.cpp @@ -66,7 +66,7 @@ // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; - if ((MO.getTargetFlags() & AArch64II::MO_GOT) != 0) { + if ((MO.getTargetFlags() & AArch64II::MO_SOURCE) == AArch64II::MO_GOT) { if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) RefKind = MCSymbolRefExpr::VK_GOTPAGE; else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == @@ -74,7 +74,7 @@ RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF; else llvm_unreachable("Unexpected target flags with MO_GOT on GV operand"); - } else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) { + } else if ((MO.getTargetFlags() & AArch64II::MO_SOURCE) == AArch64II::MO_TLS) { if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) RefKind = MCSymbolRefExpr::VK_TLVPPAGE; else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == @@ -99,10 +99,14 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, MCSymbol *Sym) const { uint32_t RefFlags = 0; + unsigned SourceFlag = MO.getTargetFlags() & AArch64II::MO_SOURCE; + unsigned FragmentFlag = MO.getTargetFlags() & AArch64II::MO_FRAGMENT; - if (MO.getTargetFlags() & AArch64II::MO_GOT) + if (SourceFlag == AArch64II::MO_GOT) RefFlags |= AArch64MCExpr::VK_GOT; - else if (MO.getTargetFlags() & AArch64II::MO_TLS) { + else if (SourceFlag == AArch64II::MO_GOTOFF) + RefFlags |= AArch64MCExpr::VK_GOTOFF; + else if (SourceFlag == AArch64II::MO_TLS) { TLSModel::Model Model; if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); @@ -133,26 +137,28 @@ RefFlags |= AArch64MCExpr::VK_TLSDESC; break; } + } else if (SourceFlag == AArch64II::MO_SEC) { + RefFlags |= AArch64MCExpr::VK_SEC; } else { // No modifier means this is a generic reference, classified as absolute for // the cases where it matters (:abs_g0: etc). RefFlags |= AArch64MCExpr::VK_ABS; } - if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) + if (FragmentFlag == AArch64II::MO_PAGE) RefFlags |= AArch64MCExpr::VK_PAGE; - else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + else if (FragmentFlag == AArch64II::MO_PAGEOFF) RefFlags |= AArch64MCExpr::VK_PAGEOFF; - else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3) + else if (FragmentFlag == AArch64II::MO_G3) RefFlags |= AArch64MCExpr::VK_G3; - else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G2) + else if (FragmentFlag == AArch64II::MO_G2) RefFlags |= AArch64MCExpr::VK_G2; - else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G1) + else if (FragmentFlag == AArch64II::MO_G1) RefFlags |= AArch64MCExpr::VK_G1; - else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0) + else if (FragmentFlag == AArch64II::MO_G0) RefFlags |= AArch64MCExpr::VK_G0; - else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12) + else if (FragmentFlag == AArch64II::MO_HI12) RefFlags |= AArch64MCExpr::VK_HI12; if (MO.getTargetFlags() & AArch64II::MO_NC) @@ -164,6 +170,37 @@ Expr = MCBinaryExpr::createAdd( Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + if (SourceFlag == AArch64II::MO_POT) { + unsigned index; + if (MO.isGlobal()) { + auto *GO = cast(MO.getGlobal()); + index = Printer.GetPOTIndex(GO); + } else { + assert(MO.isCPI() && "Can only handle globals or constant pool indices"); + index = Printer.GetPOTIndex(MO.getIndex()); + } + return MCOperand::createImm(index); + } else if (SourceFlag == AArch64II::MO_SEC) { + const MCSymbol *SecSym; + if (MO.isGlobal()) { + auto *GO = cast(MO.getGlobal()); + SecSym = Printer.GetSectionSymbol(GO); + } else { + assert(MO.isCPI() && "Can only handle globals or constant pool indices"); + SecSym = Printer.GetSectionSymbol(MO.getIndex()); + } + assert(SecSym && "Could not find a section symbol"); + const MCExpr *SecExpr = MCSymbolRefExpr::create(SecSym, Ctx); + Expr = MCBinaryExpr::createSub(Expr, SecExpr, Ctx); + if (FragmentFlag == AArch64II::MO_PAGEOFF) { + const MCExpr *MaskExpr = MCConstantExpr::create(0xfff, Ctx); + Expr = MCBinaryExpr::createAnd(Expr, MaskExpr, Ctx); + } else if (FragmentFlag == AArch64II::MO_HI12) { + const MCExpr *ShiftExpr = MCConstantExpr::create(12, Ctx); + Expr = MCBinaryExpr::createLShr(Expr, ShiftExpr, Ctx); + } + } + AArch64MCExpr::VariantKind RefKind; RefKind = static_cast(RefFlags); Expr = AArch64MCExpr::create(Expr, RefKind, Ctx); Index: lib/Target/AArch64/AArch64PagerandoOptimizer.cpp =================================================================== --- /dev/null +++ lib/Target/AArch64/AArch64PagerandoOptimizer.cpp @@ -0,0 +1,137 @@ +//===-- AArch64PagerandoOptimizer.cpp - Optimizes intra-bin function calls ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass optimizes calls inside the same position-independent bin to direct +// calls to avoid the overhead of indirect calls through the POT. +// The implementation relies on SSA form to follow def-use chains, therefore, +// this pass must be scheduled before register allocation. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" + +using namespace llvm; + +#define DEBUG_TYPE "pagerando" + +namespace { +class AArch64PagerandoOptimizer : public MachineFunctionPass { +public: + static char ID; + explicit AArch64PagerandoOptimizer() : MachineFunctionPass(ID) { + initializeAArch64PagerandoOptimizerPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::TracksLiveness); + } + +private: + void optimizeCalls(MachineInstr *MI); + void replaceWithDirectCall(MachineInstr *MI, const Function *Callee); +}; +} // end anonymous namespace + +char AArch64PagerandoOptimizer::ID = 0; +INITIALIZE_PASS(AArch64PagerandoOptimizer, "pagerando-optimizer-aarch64", + "Pagerando intra-bin optimizer for AArch64", false, false) + +FunctionPass *llvm::createAArch64PagerandoOptimizerPass() { + return new AArch64PagerandoOptimizer(); +} + +static const Function *getCallee(const MachineInstr &MI) { + assert(MI.getOpcode() == AArch64::MOVaddrBIN); + return cast(MI.getOperand(2).getGlobal()); +} + +static bool isIntraBin(const MachineInstr &MI, StringRef BinPrefix) { + return MI.getOpcode() == AArch64::MOVaddrBIN + && getCallee(MI)->getSectionPrefix() == BinPrefix; +} + +bool AArch64PagerandoOptimizer::runOnMachineFunction(MachineFunction &MF) { + auto &F = MF.getFunction(); + // This pass is an optimization (optional), therefore check skipFunction + if (!F.isPagerando() || skipFunction(F)) { + return false; + } + + // Section prefix is assigned by PagerandoBinning pass + auto BinPrefix = F.getSectionPrefix().getValue(); + + // Collect intra-bin references + std::vector Worklist; + for (auto &BB : MF) { + for (auto &MI : BB) { + if (isIntraBin(MI, BinPrefix)) + Worklist.push_back(&MI); + } + } + + // Optimize intra-bin calls + for (auto *MI : Worklist) + optimizeCalls(MI); + + return !Worklist.empty(); +} + +void AArch64PagerandoOptimizer::optimizeCalls(MachineInstr *MI) { + auto &MRI = MI->getParent()->getParent()->getRegInfo(); + + SmallVector Calls; + for (auto &Op : MI->defs()) { + for (auto &User : MRI.use_instructions(Op.getReg())) + Calls.push_back(&User); + } + + auto *Callee = getCallee(*MI); + for (auto *Call : Calls) + replaceWithDirectCall(Call, Callee); + + MI->eraseFromParent(); + // Note: this might be the only use of the preceding AArch64::LOADpot pseudo + // instruction. We schedule the DeadMachineInstructionElim pass after this + // pass to get rid of it. +} + +static unsigned toDirectCall(unsigned Opc) { + switch (Opc) { + case AArch64::BLR: return AArch64::BL; + case AArch64::TCRETURNri: return AArch64::TCRETURNdi; + default: + llvm_unreachable("Unhandled AArch64 call opcode"); + } +} + +void AArch64PagerandoOptimizer::replaceWithDirectCall(MachineInstr *MI, + const Function *Callee) { + auto &MBB = *MI->getParent(); + auto &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); + + auto Opc = toDirectCall(MI->getOpcode()); + auto MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TII.get(Opc)) + .addGlobalAddress(Callee); + + // Copy over remaining operands + auto RemainingOps = make_range(MI->operands_begin() + 1, MI->operands_end()); + for (auto &Op : RemainingOps) + MIB.add(Op); + + MI->eraseFromParent(); +} Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -156,6 +156,9 @@ if (hasBasePointer(MF)) markSuperRegs(Reserved, AArch64::W19); + if (MF.getTarget().isPagerando()) + markSuperRegs(Reserved, AArch64::W20); // POT register + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } @@ -180,7 +183,7 @@ return hasBasePointer(MF); case AArch64::X20: case AArch64::W20: - return MF.getSubtarget().isX20Reserved(); + return MF.getSubtarget().isX20Reserved() || MF.getTarget().isPagerando(); case AArch64::FP: case AArch64::W29: return TFI->hasFP(MF) || TT.isOSDarwin(); @@ -453,7 +456,9 @@ .isX18Reserved() // X18 reserved as platform register - MF.getSubtarget() .isX20Reserved() // X20 reserved as platform register - - hasBasePointer(MF); // X19 + - hasBasePointer(MF) // X19 + - MF.getTarget() + .isPagerando(); // X20 reserved as POT register case AArch64::FPR8RegClassID: case AArch64::FPR16RegClassID: case AArch64::FPR32RegClassID: Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -163,6 +163,7 @@ initializeAArch64StorePairSuppressPass(*PR); initializeFalkorHWPFFixPass(*PR); initializeFalkorMarkStridedAccessesLegacyPass(*PR); + initializeAArch64PagerandoOptimizerPass(*PR); initializeLDTLSCleanupPass(*PR); } @@ -483,6 +484,11 @@ } void AArch64PassConfig::addPreRegAlloc() { + if (TM->getOptLevel() != CodeGenOpt::None && TM->isPagerando()) { + addPass(createAArch64PagerandoOptimizerPass()); + addPass(&DeadMachineInstructionElimID); + } + // Change dead register definitions to refer to the zero register. if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) addPass(createAArch64DeadRegisterDefinitions()); Index: lib/Target/AArch64/CMakeLists.txt =================================================================== --- lib/Target/AArch64/CMakeLists.txt +++ lib/Target/AArch64/CMakeLists.txt @@ -43,6 +43,7 @@ AArch64LoadStoreOptimizer.cpp AArch64MacroFusion.cpp AArch64MCInstLower.cpp + AArch64PagerandoOptimizer.cpp AArch64PromoteConstant.cpp AArch64PBQPRegAlloc.cpp AArch64RegisterBankInfo.cpp Index: lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp =================================================================== --- lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -10,6 +10,7 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" #include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCExpr.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/MC/MCAsmBackend.h" @@ -109,7 +110,6 @@ case FK_Data_1: return 1; - case AArch64::fixup_aarch64_movw: case FK_Data_2: case FK_SecRel_2: return 2; @@ -123,6 +123,7 @@ case AArch64::fixup_aarch64_ldst_imm12_scale16: case AArch64::fixup_aarch64_ldr_pcrel_imm19: case AArch64::fixup_aarch64_pcrel_branch19: + case AArch64::fixup_aarch64_movw: return 3; case AArch64::fixup_aarch64_pcrel_adr_imm21: @@ -214,10 +215,33 @@ if (Value & 0xf) Ctx.reportError(Fixup.getLoc(), "fixup must be 16-byte aligned"); return Value >> 4; - case AArch64::fixup_aarch64_movw: + case AArch64::fixup_aarch64_movw: { + const auto *A64E = cast(Fixup.getValue()); + AArch64MCExpr::VariantKind AddressFrag = AArch64MCExpr::getAddressFrag(A64E->getKind()); + if (!AArch64MCExpr::isNotChecked(A64E->getKind())) { + if (AddressFrag == AArch64MCExpr::VK_G0 && + Value >= 0x10000ULL) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); + else if (AddressFrag == AArch64MCExpr::VK_G1 && + Value >= 0x100000000ULL) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); + else if (AddressFrag == AArch64MCExpr::VK_G2 && + Value >= 0x1000000000000ULL) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); + } + if (AddressFrag == AArch64MCExpr::VK_G0) + return Value & 0xffff; + else if (AddressFrag == AArch64MCExpr::VK_G1) + return (Value >> 16) & 0xffff; + else if (AddressFrag == AArch64MCExpr::VK_G2) + return (Value >> 32) & 0xffff; + else if (AddressFrag == AArch64MCExpr::VK_G3) + return (Value >> 48) & 0xffff; + Ctx.reportError(Fixup.getLoc(), - "no resolvable MOVZ/MOVK fixups supported yet"); + "MOVZ/MOVK fixup not recognized"); return Value; + } case AArch64::fixup_aarch64_pcrel_branch14: // Signed 16-bit immediate if (SignedValue > 32767 || SignedValue < -32768) Index: lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp =================================================================== --- lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -97,6 +97,9 @@ case AArch64MCExpr::VK_GOTTPREL_G0_NC: Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC)); return true; + case AArch64MCExpr::VK_GOTOFF: + Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(LD64_GOTOFF_LO15)); + return true; default: return false; } @@ -321,6 +324,8 @@ return ELF::R_AARCH64_NONE; } } + if (SymLoc == AArch64MCExpr::VK_GOTOFF && !IsNC) + return ELF::R_AARCH64_LD64_GOTOFF_LO15; if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) return R_CLS(TLSLD_LDST64_DTPREL_LO12); if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) @@ -413,6 +418,20 @@ return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; if (RefKind == AArch64MCExpr::VK_GOTTPREL_G0_NC) return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; + if (RefKind == AArch64MCExpr::VK_GOTOFF_G3) + return ELF::R_AARCH64_MOVW_GOTOFF_G3; + if (RefKind == AArch64MCExpr::VK_GOTOFF_G2_NC) + return ELF::R_AARCH64_MOVW_GOTOFF_G2_NC; + if (RefKind == AArch64MCExpr::VK_GOTOFF_G2) + return ELF::R_AARCH64_MOVW_GOTOFF_G2; + if (RefKind == AArch64MCExpr::VK_GOTOFF_G1_NC) + return ELF::R_AARCH64_MOVW_GOTOFF_G1_NC; + if (RefKind == AArch64MCExpr::VK_GOTOFF_G1) + return ELF::R_AARCH64_MOVW_GOTOFF_G1; + if (RefKind == AArch64MCExpr::VK_GOTOFF_G0_NC) + return ELF::R_AARCH64_MOVW_GOTOFF_G0_NC; + if (RefKind == AArch64MCExpr::VK_GOTOFF_G0) + return ELF::R_AARCH64_MOVW_GOTOFF_G0; Ctx.reportError(Fixup.getLoc(), "invalid fixup for movz/movk instruction"); return ELF::R_AARCH64_NONE; Index: lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h =================================================================== --- lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -36,6 +36,9 @@ VK_TPREL = 0x006, VK_TLSDESC = 0x007, VK_SECREL = 0x008, + VK_GOTOFF = 0x009, + VK_POT = 0x00a, + VK_SEC = 0x00b, VK_SymLocBits = 0x00f, // Variants specifying which part of the final address calculation is @@ -101,6 +104,23 @@ VK_TLSDESC_PAGE = VK_TLSDESC | VK_PAGE, VK_SECREL_LO12 = VK_SECREL | VK_PAGEOFF, VK_SECREL_HI12 = VK_SECREL | VK_HI12, + VK_GOTOFF_G3 = VK_GOTOFF | VK_G3, + VK_GOTOFF_G2 = VK_GOTOFF | VK_G2, + VK_GOTOFF_G2_NC = VK_GOTOFF | VK_G2 | VK_NC, + VK_GOTOFF_G1 = VK_GOTOFF | VK_G1, + VK_GOTOFF_G1_NC = VK_GOTOFF | VK_G1 | VK_NC, + VK_GOTOFF_G0 = VK_GOTOFF | VK_G0, + VK_GOTOFF_G0_NC = VK_GOTOFF | VK_G0 | VK_NC, + VK_SEC_HI12 = VK_SEC | VK_HI12, + VK_SEC_LO12 = VK_SEC | VK_PAGEOFF, + VK_SEC_LO12_NC = VK_SEC | VK_PAGEOFF | VK_NC, + VK_SEC_G3 = VK_SEC | VK_G3, + VK_SEC_G2 = VK_SEC | VK_G2, + VK_SEC_G2_NC = VK_SEC | VK_G2 | VK_NC, + VK_SEC_G1 = VK_SEC | VK_G1, + VK_SEC_G1_NC = VK_SEC | VK_G1 | VK_NC, + VK_SEC_G0 = VK_SEC | VK_G0, + VK_SEC_G0_NC = VK_SEC | VK_G0 | VK_NC, VK_INVALID = 0xfff }; Index: lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp =================================================================== --- lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -72,6 +72,24 @@ case VK_TLSDESC_PAGE: return ":tlsdesc:"; case VK_SECREL_LO12: return ":secrel_lo12:"; case VK_SECREL_HI12: return ":secrel_hi12:"; + case VK_GOTOFF: return ":gotoff:"; + case VK_GOTOFF_G3: return ":gotoff_g3:"; + case VK_GOTOFF_G2: return ":gotoff_g2:"; + case VK_GOTOFF_G2_NC: return ":gotoff_g2_nc:"; + case VK_GOTOFF_G1: return ":gotoff_g1:"; + case VK_GOTOFF_G1_NC: return ":gotoff_g1_nc:"; + case VK_GOTOFF_G0: return ":gotoff_g0:"; + case VK_GOTOFF_G0_NC: return ":gotoff_g0_nc:"; + case VK_SEC_HI12: return ":sec_hi12:"; + case VK_SEC_LO12: return ":sec_lo12:"; + case VK_SEC: return ":sec:"; + case VK_SEC_G3: return ":sec_g3:"; + case VK_SEC_G2: return ":sec_g2:"; + case VK_SEC_G2_NC: return ":sec_g2_nc:"; + case VK_SEC_G1: return ":sec_g1:"; + case VK_SEC_G1_NC: return ":sec_g1_nc:"; + case VK_SEC_G0: return ":sec_g0:"; + case VK_SEC_G0_NC: return ":sec_g0_nc:"; default: llvm_unreachable("Invalid ELF symbol kind"); } Index: lib/Target/AArch64/Utils/AArch64BaseInfo.h =================================================================== --- lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -507,7 +507,7 @@ MO_NO_FLAG, - MO_FRAGMENT = 0xf, + MO_FRAGMENT = 0x7, /// MO_PAGE - A symbol operand with this flag represents the pc-relative /// offset of the 4K page containing the symbol. This is used with the @@ -540,15 +540,21 @@ /// by-12-bits instruction. MO_HI12 = 7, + /// MO_DLLIMPORT - On a symbol operand, this represents that the reference + /// to the symbol is for an import stub. This is used for DLL import + /// storage class indication on Windows. + MO_DLLIMPORT = 0x08, + + MO_SOURCE = 0x70, + /// MO_GOT - This flag indicates that a symbol operand represents the /// address of the GOT entry for the symbol, rather than the address of /// the symbol itself. MO_GOT = 0x10, - /// MO_NC - Indicates whether the linker is expected to check the symbol - /// reference for overflow. For example in an ADRP/ADD pair of relocations - /// the ADRP usually does check, but not the ADD. - MO_NC = 0x20, + MO_GOTOFF = 0x20, + + MO_POT = 0x30, /// MO_TLS - Indicates that the operand being accessed is some kind of /// thread-local symbol. On Darwin, only one type of thread-local access @@ -556,10 +562,12 @@ /// referee will affect interpretation. MO_TLS = 0x40, - /// MO_DLLIMPORT - On a symbol operand, this represents that the reference - /// to the symbol is for an import stub. This is used for DLL import - /// storage class indication on Windows. - MO_DLLIMPORT = 0x80, + MO_SEC = 0x50, + + /// MO_NC - Indicates whether the linker is expected to check the symbol + /// reference for overflow. For example in an ADRP/ADD pair of relocations + /// the ADRP usually does check, but not the ADD. + MO_NC = 0x80, }; } // end namespace AArch64II Index: test/CodeGen/AArch64/pagerando-isel.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/pagerando-isel.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -march=aarch64 -relocation-model=pip -o /dev/null 2>&1 \ +; RUN: -print-before=pagerando-optimizer-aarch64 | FileCheck %s + +@global_var = global i32 0 +@internal_var = internal global i32 0 + +define void @legacy() { ret void } +define void @wrapper() { ret void } +define hidden void @binned() pagerando { ret void } + +; CHECK-LABEL: # *** IR Dump Before Pagerando intra-bin optimizer for AArch64 ***: +; CHECK-LABEL: # Machine code for function user: IsSSA, TracksLiveness +define void @user() pagerando { +; CHECK-DAG: [[POT:%[0-9]+]]:gpr64 = COPY $x20 +; CHECK-DAG: [[GOT:%[0-9]+]]:gpr64common = LOADpot [[POT]]:gpr64, 0 + +; CHECK-DAG: %{{[0-9]+}}:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc, ) @legacy, 0 +; CHECK-DAG: %{{[0-9]+}}:gpr64 = MOVKXi %{{[0-9]+}}:gpr64, target-flags(aarch64-g1, aarch64-nc, ) @legacy, 16 +; CHECK-DAG: %{{[0-9]+}}:gpr64 = MOVKXi %{{[0-9]+}}:gpr64, target-flags(aarch64-g2, aarch64-nc, ) @legacy, 32 +; CHECK-DAG: [[LEGACY_GOTOFF:%[0-9]+]]:gpr64 = MOVKXi %{{[0-9]+}}:gpr64, target-flags(aarch64-g3, ) @legacy, 48 +; CHECK: [[LEGACY:%[0-9]+]]:gpr64 = LOADgotr [[GOT]]:gpr64common, killed [[LEGACY_GOTOFF]]:gpr64 +; CHECK: BLR killed [[LEGACY]]:gpr64 + call void @legacy() + +; CHECK: %{{[0-9]+}}:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc, ) @wrapper, 0 +; CHECK: %{{[0-9]+}}:gpr64 = MOVKXi %{{[0-9]+}}:gpr64, target-flags(aarch64-g1, aarch64-nc, ) @wrapper, 16 +; CHECK: %{{[0-9]+}}:gpr64 = MOVKXi %{{[0-9]+}}:gpr64, target-flags(aarch64-g2, aarch64-nc, ) @wrapper, 32 +; CHECK: [[WRAPPER_GOTOFF:%[0-9]+]]:gpr64 = MOVKXi %{{[0-9]+}}:gpr64, target-flags(aarch64-g3, ) @wrapper, 48 +; CHECK: [[WRAPPER:%[0-9]+]]:gpr64 = LOADgotr [[GOT]]:gpr64common, killed [[WRAPPER_GOTOFF]]:gpr64 +; CHECK: BLR killed [[WRAPPER]] + call void @wrapper() + +; CHECK: [[BINNED_BIN:%[0-9]+]]:gpr64 = LOADpot [[POT]]:gpr64, target-flags(aarch64-got, ) @binned +; CHECK: [[BINNED:%[0-9]+]]:gpr64 = MOVaddrBIN killed [[BINNED_BIN]]:gpr64, target-flags(aarch64-got, aarch64-tls) @binned +; CHECK: BLR killed [[BINNED]]:gpr64 + call void @binned() + +; CHECK: %{{[0-9]+}}:gpr64 = MOVZXi target-flags(aarch64-g0, aarch64-nc, ) @global_var, 0 +; CHECK: %{{[0-9]+}}:gpr64 = MOVKXi %{{[0-9]+}}:gpr64, target-flags(aarch64-g1, aarch64-nc, ) @global_var, 16 +; CHECK: %{{[0-9]+}}:gpr64 = MOVKXi %{{[0-9]+}}:gpr64, target-flags(aarch64-g2, aarch64-nc, ) @global_var, 32 +; CHECK: [[GLOBAL_VAR_GOTOFF:%[0-9]+]]:gpr64 = MOVKXi %{{[0-9]+}}:gpr64, target-flags(aarch64-g3, ) @global_var, 48 +; CHECK: [[GLOBAL_VAR_ADDR:%[0-9]+]]:gpr64common = LOADgotr [[GOT]]:gpr64common, killed [[GLOBAL_VAR_GOTOFF]]:gpr64 +; CHECK: [[VAL:%[0-9]+]]:gpr32 = LDRWui killed [[GLOBAL_VAR_ADDR]]:gpr64common, 0 + %val = load i32, i32* @global_var + +; CHECK: [[GOT_PCREL:%[0-9]+]]:gpr64 = MOVaddrEXT target-flags(aarch64-page) &_GLOBAL_OFFSET_TABLE_, target-flags(aarch64-pageoff, aarch64-nc) &_GLOBAL_OFFSET_TABLE_ +; CHECK: [[INTERNAL_VAR_PCREL:%[0-9]+]]:gpr64 = MOVaddr target-flags(aarch64-page) @internal_var, target-flags(aarch64-pageoff, aarch64-nc) @internal_var +; CHECK: [[DIFF:%[0-9]+]]:gpr64 = SUBXrr killed [[INTERNAL_VAR_PCREL]]:gpr64, killed [[GOT_PCREL]]:gpr64 +; CHECK: STRWroX killed [[VAL]]:gpr32, [[GOT]]:gpr64common, killed [[DIFF]]:gpr64, 0, 0 + store i32 %val, i32* @internal_var + + ret void +} \ No newline at end of file Index: test/CodeGen/AArch64/pagerando-optimizer.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/pagerando-optimizer.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=aarch64 -relocation-model=pip -o - | FileCheck %s + +; CHECK-LABEL: .text +; CHECK-LABEL: wrapper: +define void @wrapper() { ret void } + +; CHECK-LABEL: .section .text.bin_1 +; CHECK-LABEL: orig: +define hidden void @orig() pagerando { ret void } + +; CHECK-LABEL: user: +define void @user() pagerando { + call void @wrapper() + +; CHECK-NOT: .text.bin_1 + call void @orig() + + ret void +} + +; CHECK-LABEL: .section .pot +; CHECK-LABEL: _PAGE_OFFSET_TABLE_: +; CHECK-NOT: .text.bin_1 \ No newline at end of file