Index: include/llvm/CodeGen/MachineConstantPool.h =================================================================== --- include/llvm/CodeGen/MachineConstantPool.h +++ include/llvm/CodeGen/MachineConstantPool.h @@ -144,6 +144,8 @@ unsigned getConstantPoolIndex(MachineConstantPoolValue *V, unsigned Alignment); + void eraseIndex(unsigned Index); + /// isEmpty - Return true if this constant pool contains no constants. bool isEmpty() const { return Constants.empty(); } Index: lib/CodeGen/MachineFunction.cpp =================================================================== --- lib/CodeGen/MachineFunction.cpp +++ lib/CodeGen/MachineFunction.cpp @@ -1066,6 +1066,11 @@ return Constants.size()-1; } +void MachineConstantPool::eraseIndex(unsigned Index) { + assert(Index < Constants.size() && "Index out of range"); + Constants.erase(Constants.begin()+Index); +} + void MachineConstantPool::print(raw_ostream &OS) const { if (Constants.empty()) return; Index: lib/Target/ARM/ARM.h =================================================================== --- lib/Target/ARM/ARM.h +++ lib/Target/ARM/ARM.h @@ -52,6 +52,7 @@ InstructionSelector * createARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI); +FunctionPass *createARMPagerandoOptimizerPass(); void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); @@ -67,6 +68,7 @@ void initializeARMConstantIslandsPass(PassRegistry &); void initializeARMExpandPseudoPass(PassRegistry &); void initializeThumb2SizeReducePass(PassRegistry &); +void initializeARMPagerandoOptimizerPass(PassRegistry &); } // end namespace llvm Index: lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- lib/Target/ARM/ARMAsmPrinter.cpp +++ lib/Target/ARM/ARMAsmPrinter.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -763,6 +764,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { switch (Modifier) { case ARMCP::no_modifier: + case ARMCP::BINOFF: return MCSymbolRefExpr::VK_None; case ARMCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; @@ -776,6 +778,13 @@ return MCSymbolRefExpr::VK_ARM_GOT_PREL; case ARMCP::SECREL: return MCSymbolRefExpr::VK_SECREL; + case ARMCP::GOTOFF: + return MCSymbolRefExpr::VK_GOTOFF; + case ARMCP::GOT_BREL: + return MCSymbolRefExpr::VK_GOT; + case ARMCP::POTOFF: + // Unreachable, handled in EmitMachineConstantPoolValue + break; } llvm_unreachable("Invalid ARMCPModifier!"); } @@ -848,6 +857,21 @@ return EmitGlobalConstant(DL, ACPC->getPromotedGlobalInit()); } + if (ACPV->getModifier() == ARMCP::POTOFF) { + // This constant pool entry refers to an offset into the POT. Compute the + // bin of the target and emit the correct constant offset. + // + // To calculate this offset statically here, we need the entire POT to be + // laid out during code generation, which requires LTO in order to generate + // all code at once. Building the POT after code generation (i.e. with + // traditional linking) would require a new static relocation. + auto *F = cast(cast(ACPV)->getGV()); + + auto ConstantOffset = ConstantInt::get( + ACPV->getType(), GetPOTIndex(F)*DL.getPointerSize()); + return EmitGlobalConstant(DL, ConstantOffset); + } + MCSymbol *MCSym; if (ACPV->isLSDA()) { MCSym = getCurExceptionSym(); @@ -895,6 +919,14 @@ PCRelExpr = MCBinaryExpr::createSub(PCRelExpr, DotExpr, OutContext); } Expr = MCBinaryExpr::createSub(Expr, PCRelExpr, OutContext); + } else if (ACPV->getModifier() == ARMCP::BINOFF) { + // This constant pool entry refers to an offset from the start of a + // pagerando bin (segment). + auto *F = cast(cast(ACPV)->getGV()); + + const MCSymbol *BinSym = GetSectionSymbol(F); + const MCExpr *BinExpr = MCSymbolRefExpr::create(BinSym, OutContext); + Expr = MCBinaryExpr::createSub(Expr, BinExpr, OutContext); } OutStreamer->EmitValue(Expr, Size); } @@ -930,7 +962,7 @@ // .word (LBB1 - LJTI_0_0) const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); - if (isPositionIndependent() || Subtarget->isROPI()) + if (isPositionIndependent() || Subtarget->isROPI() || Subtarget->isPIP()) Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol, OutContext), OutContext); Index: lib/Target/ARM/ARMConstantIslandPass.cpp =================================================================== --- lib/Target/ARM/ARMConstantIslandPass.cpp +++ lib/Target/ARM/ARMConstantIslandPass.cpp @@ -216,7 +216,7 @@ bool isThumb; bool isThumb1; bool isThumb2; - bool isPositionIndependentOrROPI; + bool isPositionIndependent_ROPI_PIP; public: static char ID; @@ -347,8 +347,9 @@ STI = &static_cast(MF->getSubtarget()); TII = STI->getInstrInfo(); - isPositionIndependentOrROPI = - STI->getTargetLowering()->isPositionIndependent() || STI->isROPI(); + isPositionIndependent_ROPI_PIP = + STI->getTargetLowering()->isPositionIndependent() || + STI->isROPI() || STI->isPIP(); AFI = MF->getInfo(); isThumb = AFI->isThumbFunction(); @@ -805,6 +806,7 @@ case ARM::t2LDRpci: case ARM::t2LDRHpci: case ARM::t2LDRBpci: + case ARM::t2PLDpci: Bits = 12; // +-offset_12 NegOk = true; break; @@ -2181,7 +2183,7 @@ if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI)) continue; - if (isPositionIndependentOrROPI) { + if (isPositionIndependent_ROPI_PIP) { MachineInstr *Add = Load->getNextNode(); if (Add->getOpcode() != ARM::tADDrr || Add->getOperand(2).getReg() != BaseReg || Index: lib/Target/ARM/ARMConstantPoolValue.h =================================================================== --- lib/Target/ARM/ARMConstantPoolValue.h +++ lib/Target/ARM/ARMConstantPoolValue.h @@ -52,6 +52,13 @@ TPOFF, /// Thread Pointer Offset SECREL, /// Section Relative (Windows TLS) SBREL, /// Static Base Relative (RWPI) + GOTOFF, /// Global Offset Table offset (TODO: Figure out if adding this + /// is ok) + GOT_BREL, /// Global Offset Table offset, generate a GOT entry (TODO: + /// Figure out if adding this is ok) + POTOFF, /// Offset in POT (TODO: Figure out if this should instead be a CPKind) + BINOFF, /// Offset from start of bin (segment) (TODO: Figure out if + /// this should instead be a CPKind) }; } // end namespace ARMCP @@ -219,7 +226,11 @@ public: static ARMConstantPoolSymbol *Create(LLVMContext &C, StringRef s, unsigned ID, - unsigned char PCAdj); + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier); + + static ARMConstantPoolSymbol *Create(LLVMContext &C, StringRef s, + ARMCP::ARMCPModifier Modifier); StringRef getSymbol() const { return S; } Index: lib/Target/ARM/ARMConstantPoolValue.cpp =================================================================== --- lib/Target/ARM/ARMConstantPoolValue.cpp +++ lib/Target/ARM/ARMConstantPoolValue.cpp @@ -68,6 +68,14 @@ return "SBREL"; case ARMCP::SECREL: return "secrel32"; + case ARMCP::GOTOFF: + return "gotoff"; + case ARMCP::GOT_BREL: + return "got_brel"; + case ARMCP::POTOFF: + return "potoff"; + case ARMCP::BINOFF: + return "binoff"; } llvm_unreachable("Unknown modifier!"); } @@ -81,6 +89,7 @@ ARMConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) { ID.AddInteger(LabelId); ID.AddInteger(PCAdjust); + ID.AddInteger(Modifier); } bool @@ -232,8 +241,15 @@ ARMConstantPoolSymbol *ARMConstantPoolSymbol::Create(LLVMContext &C, StringRef s, unsigned ID, - unsigned char PCAdj) { - return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false); + unsigned char PCAdj, + ARMCP::ARMCPModifier Modifier) { + return new ARMConstantPoolSymbol(C, s, ID, PCAdj, Modifier, false); +} + +ARMConstantPoolSymbol *ARMConstantPoolSymbol::Create(LLVMContext &C, + StringRef s, + ARMCP::ARMCPModifier Modifier) { + return new ARMConstantPoolSymbol(C, s, 0, 0, Modifier, false); } int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, Index: lib/Target/ARM/ARMFastISel.cpp =================================================================== --- lib/Target/ARM/ARMFastISel.cpp +++ lib/Target/ARM/ARMFastISel.cpp @@ -534,15 +534,15 @@ } bool ARMFastISel::isPositionIndependent() const { - return TLI.isPositionIndependent(); + return TLI.isPositionIndependent() || Subtarget->isPIP(); } unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { // For now 32-bit only. if (VT != MVT::i32 || GV->isThreadLocal()) return 0; - // ROPI/RWPI not currently supported. - if (Subtarget->isROPI() || Subtarget->isRWPI()) + // ROPI/RWPI/PIP not currently supported. + if (Subtarget->isROPI() || Subtarget->isRWPI() || Subtarget->isPIP()) return 0; bool IsIndirect = Subtarget->isGVIndirectSymbol(GV); @@ -2214,6 +2214,9 @@ CallingConv::ID CC = TLI.getLibcallCallingConv(Call); // Handle *simple* calls for now. + if (FuncInfo.MF->getFunction().isPagerando()) + return false; + Type *RetTy = I->getType(); MVT RetVT; if (RetTy->isVoidTy()) @@ -2311,6 +2314,11 @@ // Allow SelectionDAG isel to handle tail calls. if (CI->isTailCall()) return false; + // Can't handle PIP + const Function *F = dyn_cast(Callee); + if (FuncInfo.MF->getFunction().isPagerando() || (F && F->isPagerando())) + return false; + // Check the calling convention. ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); @@ -2961,11 +2969,16 @@ LLVMContext *Context = &MF->getFunction().getContext(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( - GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, - UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier, - /*AddCurrentAddress=*/UseGOT_PREL); + ARMConstantPoolValue *CPV; + if (Subtarget->isPIP()) + CPV = ARMConstantPoolConstant::Create(GV, ARMCP::GOTOFF); + else { + unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + CPV = ARMConstantPoolConstant::Create( + GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, + UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier, + /*AddCurrentAddress=*/UseGOT_PREL); + } unsigned ConstAlign = MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context)); @@ -2980,25 +2993,46 @@ MIB.addImm(0); MIB.add(predOps(ARMCC::AL)); - // Fix the address by adding pc. unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); - Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR - : ARM::PICADD; - DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0); - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) - .addReg(TempReg) - .addImm(ARMPCLabelIndex); - if (!Subtarget->isThumb()) - MIB.add(predOps(ARMCC::AL)); + if (Subtarget->isPIP()) { + // Add the GOT address stored in POT[0] + unsigned POTReg = MF->addLiveIn(TLI.getPOTBaseRegister(), &ARM::rGPRRegClass); + + Address GOTAddr; + GOTAddr.BaseType = Address::RegBase; + GOTAddr.Base.Reg = POTReg; + GOTAddr.Offset = 0; + unsigned GOTReg; + bool RV = ARMEmitLoad(TLI.getPointerTy(DL), GOTReg, GOTAddr); + assert(RV && "Should be able to handle this load."); + (void)RV; - if (UseGOT_PREL && Subtarget->isThumb()) { - unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM::t2LDRi12), NewDestReg) - .addReg(DestReg) - .addImm(0); - DestReg = NewDestReg; - AddOptionalDefs(MIB); + Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr; + GOTReg = constrainOperandRegClass(TII.get(Opc), GOTReg, 0); + DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Opc), DestReg) + .addReg(GOTReg).addReg(TempReg)); + } else { + // Fix the address by adding pc. + Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR + : ARM::PICADD; + DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg) + .addReg(TempReg) + .addImm(ARMPCLabelIndex); + if (!Subtarget->isThumb()) + MIB.add(predOps(ARMCC::AL)); + + if (UseGOT_PREL && Subtarget->isThumb()) { + unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(ARM::t2LDRi12), NewDestReg) + .addReg(DestReg) + .addImm(0); + DestReg = NewDestReg; + AddOptionalDefs(MIB); + } } return DestReg; } Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -636,6 +636,8 @@ SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; + SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; @@ -651,6 +653,7 @@ SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerPOT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -948,6 +948,7 @@ setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + setOperationAction(ISD::PAGE_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::TRAP, MVT::Other, Legal); @@ -1146,6 +1147,9 @@ setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal); } + if (Subtarget->isPIP()) + setPOTBaseRegister(ARM::R9); + // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); @@ -2007,7 +2011,39 @@ ARMFunctionInfo *AFI = MF.getInfo(); auto PtrVt = getPointerTy(DAG.getDataLayout()); - if (Subtarget->genLongCalls()) { + auto F = dyn_cast_or_null(GV); + if (auto *GA = dyn_cast_or_null(GV)) { + if (auto *Aliasee = dyn_cast(GA->getAliasee())) + F = dyn_cast(Aliasee); + } + bool UsePIPAddressing = MF.getFunction().isPagerando() || + (F && F->isPagerando()); + if (UsePIPAddressing) { + if (GV) { + Callee = LowerGlobalAddressELF(Callee, DAG); + } else if (ExternalSymbolSDNode *S=dyn_cast(Callee)) { + SDValue POTValue = DAG.getNode(ISD::PAGE_OFFSET_TABLE, dl, + DAG.getVTList(MVT::i32, MVT::Other)); + SDValue GOTAddr = DAG.getLoad( + PtrVt, dl, POTValue.getValue(1), POTValue, + MachinePointerInfo::getPOT(DAG.getMachineFunction())); + + ARMConstantPoolValue *CPV = + ARMConstantPoolSymbol::Create(*DAG.getContext(), S->getSymbol(), + ARMCP::GOT_BREL); + // Get the address of the callee into a register + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Offset = DAG.getLoad( + PtrVt, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + + Callee = DAG.getNode(ISD::ADD, dl, PtrVt, GOTAddr, Offset); + Callee = + DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + } + } else if (Subtarget->genLongCalls()) { assert((!isPositionIndependent() || Subtarget->isTargetWindows()) && "long-calls codegen is not position independent!"); // Handle a global address or an external symbol. If it's not one of @@ -2335,6 +2371,22 @@ return false; } + // Calls to pagerando functions from non-pagerando (legacy) functions must + // initialize the POT register, which is callee-saved. Thus we need to restore + // the original value of the POT register after the call and cannot tail call. + if (!CallerF.isPagerando()) { + if (auto *G = dyn_cast(Callee)) { + auto *GV = G->getGlobal(); + auto F = dyn_cast(GV); + if (auto *GA = dyn_cast(GV)) { + if (auto *Aliasee = dyn_cast(GA->getAliasee())) + F = dyn_cast(Aliasee); + } + if (F && F->isPagerando()) + return false; + } + } + // Check that the call results are passed in the same way. LLVMContext &C = *DAG.getContext(); if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, @@ -2765,7 +2817,8 @@ EVT PtrVT = getPointerTy(DAG.getDataLayout()); const BlockAddress *BA = cast(Op)->getBlockAddress(); SDValue CPAddr; - bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI(); + bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI() || + Subtarget->isPIP(); if (!IsPositionIndependent) { CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); } else { @@ -3031,6 +3084,44 @@ llvm_unreachable("bogus TLS model"); } +SDValue +ARMTargetLowering::LowerPOT(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isPIP() && + "POT lowering only supported with PIP relocation model"); + + SDLoc dl(Op); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + MachineFunction &MF = DAG.getMachineFunction(); + unsigned POTReg = getPOTBaseRegister(); + if (MF.getFunction().isPagerando()) { + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, POTReg, PtrVT); + } else { + // Need to materialize the POT address + ARMFunctionInfo *AFI = MF.getInfo(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); + unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( + *DAG.getContext(), "_PAGE_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj, + ARMCP::GOT_PREL); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Result = DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + SDValue Chain = Result.getValue(1); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); + SDValue POTAddress = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + POTAddress = + DAG.getLoad(PtrVT, dl, Chain, POTAddress, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + Chain = POTAddress.getValue(1); + + Chain = DAG.getCopyToReg(Chain, dl, POTReg, POTAddress); + SDValue Ops[2] = { POTAddress, Chain }; + return DAG.getMergeValues(Ops, dl); + } +} + /// Return true if all users of V are within function F, looking through /// ConstantExprs. static bool allUsersAreInFunction(const Value *V, const Function *F) { @@ -3207,13 +3298,67 @@ const GlobalValue *GV = cast(Op)->getGlobal(); const TargetMachine &TM = getTargetMachine(); bool IsRO = isReadOnly(GV); + bool UseGOT = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); // promoteToConstantPool only if not generating XO text section - if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly()) + if (!UseGOT && !Subtarget->genExecuteOnly()) if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl)) return V; - if (isPositionIndependent()) { + MachineFunction &MF = DAG.getMachineFunction(); + auto F = dyn_cast(GV); + if (auto *GA = dyn_cast(GV)) { + if (auto *Aliasee = dyn_cast(GA->getAliasee())) + F = dyn_cast(Aliasee); + } + bool pagerandoBinTarget = F && F->isPagerando(); + if (MF.getFunction().isPagerando() || pagerandoBinTarget) { + // Position-independent pages, access through the POT + // TODO: Add support for MOVT/W + + SDValue BaseAddr; + ARMConstantPoolValue *OffsetCPV; + SDValue POTValue = DAG.getNode(ISD::PAGE_OFFSET_TABLE, dl, + DAG.getVTList(MVT::i32, MVT::Other)); + SDValue Chain = POTValue.getValue(1); + if (pagerandoBinTarget) { + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( + F, ARMCP::POTOFF); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue POTOffset = DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + + SDValue POTAddr = DAG.getNode(ISD::ADD, dl, PtrVT, POTValue, POTOffset); + BaseAddr = DAG.getLoad( + PtrVT, dl, Chain, POTAddr, + MachinePointerInfo::getPOT(DAG.getMachineFunction())); + + OffsetCPV = + ARMConstantPoolConstant::Create(F, ARMCP::BINOFF); + } else { + BaseAddr = DAG.getLoad( + PtrVT, dl, Chain, POTValue, + MachinePointerInfo::getPOT(DAG.getMachineFunction())); + + OffsetCPV = + ARMConstantPoolConstant::Create(GV, UseGOT ? ARMCP::GOT_BREL : ARMCP::GOTOFF); + } + + SDValue OffsetCPAddr = DAG.getTargetConstantPool(OffsetCPV, PtrVT, 4); + OffsetCPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, OffsetCPAddr); + SDValue Offset = DAG.getLoad( + PtrVT, dl, DAG.getEntryNode(), OffsetCPAddr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); + + SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, BaseAddr, Offset); + if (UseGOT) + Result = + DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + return Result; + } else if (isPositionIndependent() || Subtarget->isPIP()) { bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, UseGOT_PREL ? ARMII::MO_GOT : 0); @@ -3270,6 +3415,8 @@ SelectionDAG &DAG) const { assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"); + assert(!Subtarget->isPIP() && + "PIP not currently supported for Darwin"); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); @@ -3298,6 +3445,8 @@ "Windows on ARM expects to use movw/movt"); assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"); + assert(!Subtarget->isPIP() && + "PIP not currently supported for Windows"); const GlobalValue *GV = cast(Op)->getGlobal(); const ARMII::TOF TargetFlags = @@ -4806,7 +4955,7 @@ return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, Addr, Op.getOperand(2), JTI); } - if (isPositionIndependent() || Subtarget->isROPI()) { + if (isPositionIndependent() || Subtarget->isROPI() || Subtarget->isPIP()) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); @@ -8067,6 +8216,7 @@ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::PAGE_OFFSET_TABLE: return LowerPOT(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); @@ -8232,6 +8382,8 @@ int FI) const { assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported with SjLj"); + assert(!Subtarget->isPIP() && + "FIXME: PIP not currently supported with SjLj"); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); MachineFunction *MF = MBB->getParent(); Index: lib/Target/ARM/ARMPagerandoOptimizer.cpp =================================================================== --- /dev/null +++ lib/Target/ARM/ARMPagerandoOptimizer.cpp @@ -0,0 +1,268 @@ +//===-- ARMPagerandoOptimizer.cpp - Optimizes intra-bin function calls ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass optimizes calls inside the same position-independent bin to direct +// calls to avoid the overhead of indirect calls through the POT. +// The implementation relies on SSA form to follow def-use chains, therefore, +// this pass must be scheduled before register allocation. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" + +using namespace llvm; + +#define DEBUG_TYPE "pagerando" + +namespace { +class ARMPagerandoOptimizer : public MachineFunctionPass { +public: + static char ID; + explicit ARMPagerandoOptimizer() : MachineFunctionPass(ID) { + initializeARMPagerandoOptimizerPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::TracksLiveness); + } + +private: + void optimizeCalls(MachineInstr *MI, const Function *Callee); + void replaceWithDirectCall(MachineInstr *MI, const Function *Callee); + void changeToPCRelativeCall(MachineInstr *MI, const Function *Callee); + void deleteCPEntries(MachineFunction &MF, const SmallSet &CPIndices); +}; +} // end anonymous namespace + +char ARMPagerandoOptimizer::ID = 0; +INITIALIZE_PASS(ARMPagerandoOptimizer, "pagerando-optimizer-arm", + "Pagerando intra-bin optimizer for ARM", false, false) + +FunctionPass *llvm::createARMPagerandoOptimizerPass() { + return new ARMPagerandoOptimizer(); +} + +static bool isIntraBin(const MachineConstantPoolEntry &E, StringRef BinPrefix) { + if (!E.isMachineConstantPoolEntry()) return false; + + // ARMConstantPoolValue lacks casting infrastructure to use dyn_cast directly + auto *CPV = static_cast(E.Val.MachineCPVal); + auto *CPC = dyn_cast(CPV); + if (!CPC) return false; + + auto M = CPC->getModifier(); + auto *F = dyn_cast_or_null(CPC->getGV()); + + return (M == ARMCP::POTOFF || M == ARMCP::BINOFF) + && F && F->getSectionPrefix() == BinPrefix; +} + +static const Function *getCallee(const MachineConstantPoolEntry &E) { + auto *CPC = static_cast(E.Val.MachineCPVal); + return cast(CPC->getGV()); +} + +static int getCPIndex(const MachineInstr &MI) { + if (MI.mayLoad() && MI.getNumOperands() > 1 && MI.getOperand(1).isCPI()) + return MI.getOperand(1).getIndex(); + return -1; +} + +bool ARMPagerandoOptimizer::runOnMachineFunction(MachineFunction &MF) { + auto &F = MF.getFunction(); + // This pass is an optimization (optional), therefore check skipFunction + if (!F.isPagerando() || skipFunction(F)) { + return false; + } + + // Section prefix is assigned by PagerandoBinning pass + auto BinPrefix = F.getSectionPrefix().getValue(); + auto &CPEntries = MF.getConstantPool()->getConstants(); + + // Find intra-bin CP entries + SmallSet CPIndices; + int Index = 0; + for (auto &E : CPEntries) { + if (isIntraBin(E, BinPrefix)) + CPIndices.insert(Index); + Index++; + } + + if (CPIndices.empty()) + return false; + + // Collect uses of intra-bin CP entries + std::vector Uses; + for (auto &BB : MF) { + for (auto &MI : BB) { + int Index = getCPIndex(MI); + if (CPIndices.count(Index)) + Uses.push_back(&MI); + } + } + + // Optimize intra-bin calls + for (auto *MI : Uses) { + int Index = getCPIndex(*MI); + auto *Callee = getCallee(CPEntries[Index]); + optimizeCalls(MI, Callee); + } + + deleteCPEntries(MF, CPIndices); + + return true; +} + +static bool isBXCall(unsigned Opc) { + return Opc == ARM::BX_CALL || Opc == ARM::tBX_CALL; +} + +void ARMPagerandoOptimizer::optimizeCalls(MachineInstr *MI, + const Function *Callee) { + auto &MRI = MI->getParent()->getParent()->getRegInfo(); + + SmallVector Queue{MI}; + while (!Queue.empty()) { + MI = Queue.pop_back_val(); + + if (!MI->isCall()) { // Not a call, enqueue users + for (auto &Op : MI->defs()) { + for (auto &User : MRI.use_instructions(Op.getReg())) + Queue.push_back(&User); + } + MI->eraseFromParent(); + } else if (isBXCall(MI->getOpcode())) { + changeToPCRelativeCall(MI, Callee); + } else { // Standard indirect call + replaceWithDirectCall(MI, Callee); + } + } +} + +static unsigned toDirectCall(unsigned Opc) { + switch (Opc) { + case ARM::BLX: return ARM::BL; + case ARM::tBLXr: return ARM::tBL; + case ARM::TCRETURNri: return ARM::TCRETURNdi; + default: + llvm_unreachable("Unhandled ARM call opcode"); + } +} + +void ARMPagerandoOptimizer::replaceWithDirectCall(MachineInstr *MI, + const Function *Callee) { + auto &MBB = *MI->getParent(); + auto &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); + + auto Opc = toDirectCall(MI->getOpcode()); + auto MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TII.get(Opc)); + + int SkipOps = 1; + if (MI->getOpcode() == ARM::tBLXr) { // Short instruction + auto CondOp = predOps(ARMCC::AL); + MIB.add(CondOp); + SkipOps += CondOp.size(); + } + MIB.addGlobalAddress(Callee); + + // Copy over remaining operands + auto RemainingOps = make_range(MI->operands_begin() + SkipOps, + MI->operands_end()); + for (auto &Op : RemainingOps) + MIB.add(Op); + + MI->eraseFromParent(); +} + +// Replace indirect register operand with more efficient PC-relative access +void ARMPagerandoOptimizer::changeToPCRelativeCall(MachineInstr *MI, + const Function *Callee) { + auto &MBB = *MI->getParent(); + auto &MF = *MBB.getParent(); + auto &C = MF.getFunction().getContext(); + auto &AFI = *MF.getInfo(); + auto &TII = *MF.getSubtarget().getInstrInfo(); + auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + auto &MRI = MF.getRegInfo(); + auto isThumb = AFI.isThumbFunction(); + + // Create updated CP entry for callee + auto Label = AFI.createPICLabelUId(); + auto PCAdj = isThumb ? 4 : 8; + auto *CPV = ARMConstantPoolConstant::Create( + Callee, Label, ARMCP::CPValue, PCAdj, ARMCP::no_modifier, false); + auto Alignment = DL.getPrefTypeAlignment(Type::getInt32PtrTy(C)); + auto Index = MF.getConstantPool()->getConstantPoolIndex(CPV, Alignment); + + // Load callee offset into register + auto Opc = AFI.isThumb2Function() ? ARM::t2LDRpci : ARM::LDRcp; + auto OffsetReg = MRI.createVirtualRegister(&ARM::rGPRRegClass); + auto MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TII.get(Opc), OffsetReg) + .addConstantPoolIndex(Index); + if (Opc == ARM::LDRcp) MIB.addImm(0); + MIB.add(predOps(ARMCC::AL)); + + // Compute callee address by adding PC + // FIXME: this is ugly // comment by Stephen + auto RegClass = TLI.getRegClassFor(TLI.getPointerTy(DL)); + auto AddressReg = MRI.createVirtualRegister(RegClass); + Opc = isThumb ? ARM::tPICADD : ARM::PICADD; + MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TII.get(Opc), AddressReg) + .addReg(OffsetReg) + .addImm(Label); + if (!isThumb) MIB.add(predOps(ARMCC::AL)); + + // Replace register operand + MI->getOperand(0).setReg(AddressReg); +} + +void ARMPagerandoOptimizer::deleteCPEntries(MachineFunction &MF, + const SmallSet &CPIndices) { + auto *CP = MF.getConstantPool(); + int Size = CP->getConstants().size(); + int *Indices = new int[Size]; + + // Create CP index mapping: Indices[Old] -> New + for (int Old = 0, New = 0; Old < Size; ++Old) { + Indices[Old] = CPIndices.count(Old) ? -1 : New++; + } + + // Update remaining (inter-bin) CP references + for (auto &BB : MF) { + for (auto &MI : BB) { + for (auto &Op : MI.explicit_uses()) { + if (Op.isCPI()) { + int Old = Op.getIndex(); + int New = Indices[Old]; + assert (New != -1 && "CP entry use should have been deleted"); + Op.setIndex(New); + } + } + } + } + + // Delete now unreferenced (intra-bin) CP entries (in reverse order so + // deletion does not affect the index of future deletions) + for (int Old = Size - 1; Old >= 0; --Old) { + if (Indices[Old] == -1) + CP->eraseIndex(Old); + } + + delete[] Indices; +} Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -682,6 +682,7 @@ bool isROPI() const; bool isRWPI() const; + bool isPIP() const; bool useMachineScheduler() const { return UseMISched; } bool disablePostRAScheduler() const { return DisablePostRAScheduler; } Index: lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- lib/Target/ARM/ARMSubtarget.cpp +++ lib/Target/ARM/ARMSubtarget.cpp @@ -247,7 +247,7 @@ (Options.UnsafeFPMath || isTargetDarwin())) UseNEONForSinglePrecisionFP = true; - if (isRWPI()) + if (isRWPI() || isPIP()) ReserveR9 = true; // FIXME: Teach TableGen to deal with these instead of doing it manually here. @@ -326,6 +326,9 @@ return TM.getRelocationModel() == Reloc::RWPI || TM.getRelocationModel() == Reloc::ROPI_RWPI; } +bool ARMSubtarget::isPIP() const { + return TM.getRelocationModel() == Reloc::PIP; +} bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -94,6 +94,7 @@ initializeARMExecutionDomainFixPass(Registry); initializeARMExpandPseudoPass(Registry); initializeThumb2SizeReducePass(Registry); + initializeARMPagerandoOptimizerPass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -186,6 +187,10 @@ assert(TT.isOSBinFormatELF() && "ROPI/RWPI currently only supported for ELF"); + if (*RM == Reloc::PIP) + assert(TT.isOSBinFormatELF() && + "PIP currently only supported for ELF"); + // DynamicNoPIC is only used on darwin. if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin()) return Reloc::Static; @@ -455,6 +460,9 @@ void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { + if (TM->isPagerando()) + addPass(createARMPagerandoOptimizerPass()); + addPass(createMLxExpansionPass()); if (EnableARMLoadStoreOpt) Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -178,7 +178,7 @@ // In the ROPI and RWPI relocation models we can't have pointers to global // variables or functions in constant data, so don't convert switches to // lookup tables if any of the values would need relocation. - if (ST->isROPI() || ST->isRWPI()) + if (ST->isROPI() || ST->isRWPI() || ST->isPIP()) return !C->needsRelocation(); return true; Index: lib/Target/ARM/CMakeLists.txt =================================================================== --- lib/Target/ARM/CMakeLists.txt +++ lib/Target/ARM/CMakeLists.txt @@ -39,6 +39,7 @@ ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp ARMMacroFusion.cpp + ARMPagerandoOptimizer.cpp ARMRegisterInfo.cpp ARMOptimizeBarriersPass.cpp ARMRegisterBankInfo.cpp Index: test/CodeGen/ARM/pagerando-isel.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/pagerando-isel.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -mtriple=armv7-linux -relocation-model=pip -o /dev/null 2>&1 \ +; RUN: -print-before=pagerando-optimizer-arm | FileCheck %s + +@global_var = global i32 0 +@internal_var = internal global i32 0 + +define void @legacy() { ret void } +define void @wrapper() { ret void } +define hidden void @binned() pagerando { ret void } + +; CHECK-LABEL: # *** IR Dump Before Pagerando intra-bin optimizer for ARM ***: +; CHECK-LABEL: # Machine code for function user: IsSSA, TracksLiveness +; CHECK-LABEL: Constant Pool: +; CHECK: cp#0: legacy(got_brel), align=4 +; CHECK: cp#1: wrapper(got_brel), align=4 +; CHECK: cp#2: binned(potoff), align=4 +; CHECK: cp#3: binned(binoff), align=4 +; CHECK: cp#4: global_var(got_brel), align=4 +; CHECK: cp#5: internal_var(gotoff), align=4 + +; CHECK-LABEL: bb.0 (%ir-block.0) +; CHECK-NEXT: [[LEGACY_GOT:%[0-9]+]]:gprnopc = LDRi12 %const.0, 0, 14, $noreg :: (load 4 from constant-pool) +define void @user() pagerando { +; CHECK-DAG: [[POT:%[0-9]+]]:gpr = COPY $r9 +; CHECK-DAG: [[GOT:%[0-9]+]]:gpr = LDRi12 [[POT]]:gpr, 0, 14, $noreg :: (load 4 from pot) +; CHECK-DAG: [[LEGACY:%[0-9]+]]:gpr = LDRrs [[GOT]]:gpr, killed [[LEGACY_GOT]]:gprnopc, 0, 14, $noreg :: (load 4 from got) + +; CHECK: BLX killed [[LEGACY]]:gpr + call void @legacy() + +; CHECK: [[WRAPPER_GOT:%[0-9]+]]:gprnopc = LDRi12 %const.1, 0, 14, $noreg :: (load 4 from constant-pool) +; CHECK: [[WRAPPER:%[0-9]+]]:gpr = LDRrs [[GOT]]:gpr, killed [[WRAPPER_GOT]] +; CHECK: BLX killed [[WRAPPER]]:gpr + call void @wrapper() +; CHECK: [[BINNED_POTOFF:%[0-9]+]]:gprnopc = LDRi12 %const.2, 0, 14, $noreg :: (load 4 from constant-pool) +; CHECK: [[BINNED_BIN:%[0-9]+]]:gpr = LDRrs [[POT]]:gpr, killed [[BINNED_POTOFF]]:gprnopc, 0, 14, $noreg +; CHECK: [[BINNED_BINOFF:%[0-9]+]]:gpr = LDRi12 %const.3, 0, 14, $noreg :: (load 4 from constant-pool) +; CHECK: [[BINNED:%[0-9]+]]:gpr = ADDrr killed [[BINNED_BIN]]:gpr, killed [[BINNED_BINOFF]]:gpr, 14, $noreg, $noreg +; CHECK: BLX killed [[BINNED]]:gpr + call void @binned() + +; CHECK: [[GLOBAL_VAR_GOT:%[0-9]+]]:gprnopc = LDRi12 %const.4, 0, 14, $noreg :: (load 4 from constant-pool) +; CHECK: [[GLOBAL_VAR:%[0-9]+]]:gpr = LDRrs [[GOT]]:gpr, killed [[GLOBAL_VAR_GOT]]:gprnopc, 0, 14, $noreg +; CHECK-DAG: [[VAL:%[0-9]+]]:gpr = LDRi12 killed [[GLOBAL_VAR]]:gpr, 0, 14, $noreg + %val = load i32, i32* @global_var + +; CHECK-DAG: [[INTERNAL_VAR_GOTOFF:%[0-9]+]]:gprnopc = LDRi12 %const.5, 0, 14, $noreg :: (load 4 from constant-pool) +; CHECK: STRrs killed [[VAL]]:gpr, [[GOT]]:gpr, killed [[INTERNAL_VAR_GOTOFF]]:gprnopc, 0, 14, $noreg + store i32 %val, i32* @internal_var + + ret void +} Index: test/CodeGen/ARM/pagerando-optimizer.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/pagerando-optimizer.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mtriple=armv7-linux -relocation-model=pip -o - | FileCheck %s + +; CHECK-LABEL: .text +; CHECK-LABEL: wrapper: +define void @wrapper() { ret void } + +; CHECK-LABEL: .section .text.bin_1 +; CHECK-LABEL: orig: +define hidden void @orig() pagerando { ret void } + +; CHECK-LABEL: user: +define void @user() pagerando { + call void @wrapper() + +; CHECK-NOT: add +; CHECK: bl orig + call void @orig() + + ret void + +; Should not appear in the constant pool of this function. +; CHECK-NOT: .text.bin_1 +} + +; CHECK-LABEL: .section .pot +; CHECK-LABEL: _PAGE_OFFSET_TABLE_: +; CHECK-NOT: .text.bin_1 \ No newline at end of file