diff --git a/llvm/include/llvm/CodeGen/LiveRegUnits.h b/llvm/include/llvm/CodeGen/LiveRegUnits.h --- a/llvm/include/llvm/CodeGen/LiveRegUnits.h +++ b/llvm/include/llvm/CodeGen/LiveRegUnits.h @@ -160,19 +160,6 @@ void addPristines(const MachineFunction &MF); }; -/// Returns an iterator range over all physical register and mask operands for -/// \p MI and bundled instructions. This also skips any debug operands. -inline iterator_range>> -phys_regs_and_masks(const MachineInstr &MI) { - std::function Pred = - [](const MachineOperand &MOP) { - return MOP.isRegMask() || (MOP.isReg() && !MOP.isDebug() && - Register::isPhysicalRegister(MOP.getReg())); - }; - return make_filter_range(const_mi_bundle_ops(MI), Pred); -} - } // end namespace llvm #endif // LLVM_CODEGEN_LIVEREGUNITS_H diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h --- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h @@ -235,6 +235,7 @@ const Target *MArch = nullptr; std::string TripleStr; unsigned OptLevel = 2; + unsigned SizeLevel = 0; lto_diagnostic_handler_t DiagHandler = nullptr; void *DiagContext = nullptr; bool ShouldInternalize = EnableLTOInternalization; diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp --- a/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LivePhysRegs.h" -#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBundle.h" @@ -43,23 +42,28 @@ /// Remove defined registers and regmask kills from the set. void LivePhysRegs::removeDefs(const MachineInstr &MI) { - for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { - if (MOP.isRegMask()) { - removeRegsInMask(MOP); - continue; - } - - if (MOP.isDef()) - removeReg(MOP.getReg()); + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + if (O->isReg()) { + if (!O->isDef() || O->isDebug()) + continue; + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) + continue; + removeReg(Reg); + } else if (O->isRegMask()) + removeRegsInMask(*O); } } /// Add uses to the set. void LivePhysRegs::addUses(const MachineInstr &MI) { - for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { - if (!MOP.isReg() || !MOP.readsReg()) + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg() || O->isDebug()) + continue; + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; - addReg(MOP.getReg()); + addReg(Reg); } } diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp --- a/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -39,34 +39,41 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { // Remove defined registers and regmask kills from the set. - for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { - if (MOP.isRegMask()) { - removeRegsNotPreserved(MOP.getRegMask()); - continue; - } - - if (MOP.isDef()) - removeReg(MOP.getReg()); + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + if (O->isReg()) { + if (!O->isDef() || O->isDebug()) + continue; + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) + continue; + removeReg(Reg); + } else if (O->isRegMask()) + removeRegsNotPreserved(O->getRegMask()); } // Add uses to the set. - for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { - if (!MOP.isReg() || !MOP.readsReg()) + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg() || O->isDebug()) + continue; + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) continue; - addReg(MOP.getReg()); + addReg(Reg); } } void LiveRegUnits::accumulate(const MachineInstr &MI) { // Add defs, uses and regmask clobbers to the set. - for (const MachineOperand &MOP : phys_regs_and_masks(MI)) { - if (MOP.isRegMask()) { - addRegsInMask(MOP.getRegMask()); - continue; - } - if (!MOP.isDef() && !MOP.readsReg()) - continue; - addReg(MOP.getReg()); + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + if (O->isReg()) { + Register Reg = O->getReg(); + if (!Register::isPhysicalRegister(Reg)) + continue; + if (!O->isDef() && !O->readsReg()) + continue; + addReg(Reg); + } else if (O->isRegMask()) + addRegsInMask(O->getRegMask()); } } diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -618,6 +618,15 @@ F->addFnAttr(Attribute::OptimizeForSize); F->addFnAttr(Attribute::MinSize); +#if 1 // UBER_CUSTOMIZATION + AttrBuilder B; + B.addAttribute("outliner", "true"); + F->removeAttributes(AttributeList::FunctionIndex, B); + if (F->hasFnAttribute("outliner")) + LLVM_DEBUG(errs() << "still has outliner\n"); + F->addAttributes(AttributeList::FunctionIndex, B); +#endif // end UBER_CUSTOMIZATION + // Include target features from an arbitrary candidate for the outlined // function. This makes sure the outlined function knows what kinds of // instructions are going into it. This is fine, since all parent functions @@ -807,7 +816,7 @@ if (MOP.isDef()) { // Introduce DefRegs set to skip the redundant register. DefRegs.insert(MOP.getReg()); - if (UseRegs.count(MOP.getReg())) + if (!MOP.isDead() && UseRegs.count(MOP.getReg())) // Since the regiester is modeled as defined, // it is not necessary to be put in use register set. UseRegs.erase(MOP.getReg()); @@ -865,6 +874,11 @@ if (F.empty()) continue; +#if 1 // UBER_CUSTOMIZATION + if (F.getFnAttribute("outliner").getValueAsString() == "true") + continue; +#endif // end UBER_CUSTOMIZATION + // There's something in F. Check if it has a MachineFunction associated with // it. MachineFunction *MF = MMI.getMachineFunction(F); diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -211,6 +211,10 @@ } void LTOCodeGenerator::setOptLevel(unsigned Level) { + if (Level > 3) { + SizeLevel = Level - 3; + Level = 2; + } OptLevel = Level; switch (OptLevel) { case 0: @@ -589,7 +593,7 @@ PMB.LoopVectorize = !DisableVectorization; PMB.SLPVectorize = !DisableVectorization; if (!DisableInline) - PMB.Inliner = createFunctionInliningPass(); + PMB.Inliner = createFunctionInliningPass(OptLevel, SizeLevel, false); PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple); if (Freestanding) PMB.LibraryInfo->disableAllFunctions(); diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1262,6 +1262,19 @@ Flags[ID].first = SrcOp; }; + auto isSwiftBitCode = [&](Module &M) { + SmallVector ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + for (const auto &MFE : ModuleFlags) { + if (MFE.Behavior == Module::Require) + continue; + StringRef Key = MFE.Key->getString(); + if (Key == "Swift Version") + return true; + } + return false; + }; + // If either flag has override behavior, handle it first. if (DstBehaviorValue == Module::Override) { // Diagnose inconsistent flags which both have override behavior. @@ -1284,11 +1297,32 @@ DstBehaviorValue == Module::Warning) || (DstBehaviorValue == Module::Max && SrcBehaviorValue == Module::Warning); - if (!MaxAndWarn) + if (!MaxAndWarn) { + if (SrcOp->getOperand(2) != DstOp->getOperand(2) && + ID->getString().equals("Objective-C Garbage Collection")) { + auto Int32Ty = Type::getInt32Ty(DstM.getContext()); + auto SrcMD = dyn_cast(SrcOp->getOperand(2)); + auto DstMD = dyn_cast(DstOp->getOperand(2)); + assert(SrcMD && DstMD); + unsigned SrcVal = + SrcMD->getValue()->getUniqueInteger().getZExtValue(); + unsigned DstVal = + DstMD->getValue()->getUniqueInteger().getZExtValue(); + if ((isSwiftBitCode(*SrcM) && DstVal < 64) || + (isSwiftBitCode(DstM) && SrcVal < 64)) { + unsigned resVal = SrcVal | DstVal; + SrcOp->replaceOperandWith( + 2, ConstantAsMetadata::get(ConstantInt::get(Int32Ty, resVal))); + DstOp->replaceOperandWith( + 2, ConstantAsMetadata::get(ConstantInt::get(Int32Ty, resVal))); + continue; + } + } return stringErr("linking module flags '" + ID->getString() + "': IDs have conflicting behaviors in '" + SrcM->getModuleIdentifier() + "' and '" + DstM.getModuleIdentifier() + "'"); + } } auto replaceDstValue = [&](MDNode *New) { @@ -1469,6 +1503,20 @@ // are properly remapped. linkNamedMDNodes(); + // For each global in the source module, find the corresponding global + // in the destination module, splice the global and append it to the end + // of the globals list in the destination module. + Module::GlobalListType &Globals = DstM.getGlobalList(); + for (GlobalVariable &GV : SrcM->globals()) { + if (GV.hasAppendingLinkage()) + continue; + auto NewValue = Mapper.mapValue(GV); + if (NewValue) { + auto *NewGV = dyn_cast(NewValue->stripPointerCasts()); + if (NewGV) + Globals.splice(Globals.end(), Globals, NewGV->getIterator()); + } + } if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) { // Append the module inline asm string. DstM.appendModuleInlineAsm(adjustInlineAsm(SrcM->getModuleInlineAsm(), diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6256,6 +6256,15 @@ return C.getMF()->getInfo()->branchTargetEnforcement(); }); +#if 1 // UBER_CUSTOMIZATION + bool HasOutlined = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) { + return C.getMF() + ->getFunction() + .getFnAttribute("outliner") + .getValueAsString() == "true"; + }); +#endif // end UBER_CUSTOMIZATION + // We check to see if CFI Instructions are present, and if they are // we find the number of CFI Instructions in the candidates. unsigned CFICount = 0; @@ -6353,10 +6362,11 @@ SetCandidateCallInfo(MachineOutlinerTailCall, 4); } - else if (LastInstrOpcode == AArch64::BL || - ((LastInstrOpcode == AArch64::BLR || - LastInstrOpcode == AArch64::BLRNoIP) && - !HasBTI)) { + else if ((LastInstrOpcode == AArch64::BL || + ((LastInstrOpcode == AArch64::BLR || + LastInstrOpcode == AArch64::BLRNoIP) && + !HasBTI)) && + !HasOutlined) { // FIXME: Do we need to check if the code after this uses the value of LR? FrameID = MachineOutlinerThunk; NumBytesToCreateFrame = 0; diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -34,12 +34,10 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/DebugCounter.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include #include -#include #include #include @@ -55,9 +53,6 @@ STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted"); STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted"); -DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming", - "Controls which pairs are considered for renaming"); - // The LdStLimit limits how far we search for load/store pairs. static cl::opt LdStLimit("aarch64-load-store-scan-limit", cl::init(20), cl::Hidden); @@ -87,11 +82,6 @@ // to be extended, 0 means I, and 1 means the returned iterator. int SExtIdx = -1; - // If not none, RenameReg can be used to rename the result register of the - // first store in a pair. Currently this only works when merging stores - // forward. - Optional RenameReg = None; - LdStPairFlags() = default; void setMergeForward(bool V = true) { MergeForward = V; } @@ -99,10 +89,6 @@ void setSExtIdx(int V) { SExtIdx = V; } int getSExtIdx() const { return SExtIdx; } - - void setRenameReg(MCPhysReg R) { RenameReg = R; } - void clearRenameReg() { RenameReg = None; } - Optional getRenameReg() const { return RenameReg; } }; struct AArch64LoadStoreOpt : public MachineFunctionPass { @@ -119,7 +105,6 @@ // Track which register units have been modified and used. LiveRegUnits ModifiedRegUnits, UsedRegUnits; - LiveRegUnits DefinedInBB; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); @@ -557,8 +542,8 @@ } } -static MachineOperand &getLdStRegOp(MachineInstr &MI, - unsigned PairedRegOp = 0) { +static const MachineOperand &getLdStRegOp(const MachineInstr &MI, + unsigned PairedRegOp = 0) { assert(PairedRegOp < 2 && "Unexpected register operand idx."); unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0; return MI.getOperand(Idx); @@ -741,43 +726,6 @@ return NextI; } -// Apply Fn to all instructions between MI and the beginning of the block, until -// a def for DefReg is reached. Returns true, iff Fn returns true for all -// visited instructions. Stop after visiting Limit iterations. -static bool forAllMIsUntilDef(MachineInstr &MI, MCPhysReg DefReg, - const TargetRegisterInfo *TRI, unsigned Limit, - std::function &Fn) { - auto MBB = MI.getParent(); - for (MachineInstr &I : - instructionsWithoutDebug(MI.getReverseIterator(), MBB->instr_rend())) { - if (!Limit) - return false; - --Limit; - - bool isDef = any_of(I.operands(), [DefReg, TRI](MachineOperand &MOP) { - return MOP.isReg() && MOP.isDef() && !MOP.isDebug() && MOP.getReg() && - TRI->regsOverlap(MOP.getReg(), DefReg); - }); - if (!Fn(I, isDef)) - return false; - if (isDef) - break; - } - return true; -} - -static void updateDefinedRegisters(MachineInstr &MI, LiveRegUnits &Units, - const TargetRegisterInfo *TRI) { - - for (const MachineOperand &MOP : phys_regs_and_masks(MI)) - if (MOP.isReg() && MOP.isKill()) - Units.removeReg(MOP.getReg()); - - for (const MachineOperand &MOP : phys_regs_and_masks(MI)) - if (MOP.isReg() && !MOP.isKill()) - Units.addReg(MOP.getReg()); -} - MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, @@ -799,72 +747,6 @@ bool MergeForward = Flags.getMergeForward(); - Optional RenameReg = Flags.getRenameReg(); - if (MergeForward && RenameReg) { - MCRegister RegToRename = getLdStRegOp(*I).getReg(); - DefinedInBB.addReg(*RenameReg); - - // Return the sub/super register for RenameReg, matching the size of - // OriginalReg. - auto GetMatchingSubReg = [this, - RenameReg](MCPhysReg OriginalReg) -> MCPhysReg { - for (MCPhysReg SubOrSuper : TRI->sub_and_superregs_inclusive(*RenameReg)) - if (TRI->getMinimalPhysRegClass(OriginalReg) == - TRI->getMinimalPhysRegClass(SubOrSuper)) - return SubOrSuper; - llvm_unreachable("Should have found matching sub or super register!"); - }; - - std::function UpdateMIs = - [this, RegToRename, GetMatchingSubReg](MachineInstr &MI, bool IsDef) { - if (IsDef) { - bool SeenDef = false; - for (auto &MOP : MI.operands()) { - // Rename the first explicit definition and all implicit - // definitions matching RegToRename. - if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() && - (!SeenDef || (MOP.isDef() && MOP.isImplicit())) && - TRI->regsOverlap(MOP.getReg(), RegToRename)) { - assert((MOP.isImplicit() || - (MOP.isRenamable() && !MOP.isEarlyClobber())) && - "Need renamable operands"); - MOP.setReg(GetMatchingSubReg(MOP.getReg())); - SeenDef = true; - } - } - } else { - for (auto &MOP : MI.operands()) { - if (MOP.isReg() && !MOP.isDebug() && MOP.getReg() && - TRI->regsOverlap(MOP.getReg(), RegToRename)) { - assert((MOP.isImplicit() || - (MOP.isRenamable() && !MOP.isEarlyClobber())) && - "Need renamable operands"); - MOP.setReg(GetMatchingSubReg(MOP.getReg())); - } - } - } - LLVM_DEBUG(dbgs() << "Renamed " << MI << "\n"); - return true; - }; - forAllMIsUntilDef(*I, RegToRename, TRI, LdStLimit, UpdateMIs); - -#if !defined(NDEBUG) - // Make sure the register used for renaming is not used between the paired - // instructions. That would trash the content before the new paired - // instruction. - for (auto &MI : - iterator_range>( - std::next(I), std::next(Paired))) - assert(all_of(MI.operands(), - [this, &RenameReg](const MachineOperand &MOP) { - return !MOP.isReg() || MOP.isDebug() || !MOP.getReg() || - !TRI->regsOverlap(MOP.getReg(), *RenameReg); - }) && - "Rename register used between paired instruction, trashing the " - "content"); -#endif - } - // Insert our new paired instruction after whichever of the paired // instructions MergeForward indicates. MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; @@ -993,11 +875,6 @@ } LLVM_DEBUG(dbgs() << "\n"); - if (MergeForward) - for (const MachineOperand &MOP : phys_regs_and_masks(*I)) - if (MOP.isReg() && MOP.isKill()) - DefinedInBB.addReg(MOP.getReg()); - // Erase the old instructions. I->eraseFromParent(); Paired->eraseFromParent(); @@ -1263,177 +1140,6 @@ // FIXME: Can we also match a mixed sext/zext unscaled/scaled pair? } -static bool -canRenameUpToDef(MachineInstr &FirstMI, LiveRegUnits &UsedInBetween, - SmallPtrSetImpl &RequiredClasses, - const TargetRegisterInfo *TRI) { - if (!FirstMI.mayStore()) - return false; - - // Check if we can find an unused register which we can use to rename - // the register used by the first load/store. - auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg()); - MachineFunction &MF = *FirstMI.getParent()->getParent(); - if (!RegClass || !MF.getRegInfo().tracksLiveness()) - return false; - - auto RegToRename = getLdStRegOp(FirstMI).getReg(); - // For now, we only rename if the store operand gets killed at the store. - if (!getLdStRegOp(FirstMI).isKill() && - !any_of(FirstMI.operands(), - [TRI, RegToRename](const MachineOperand &MOP) { - return MOP.isReg() && !MOP.isDebug() && MOP.getReg() && - MOP.isImplicit() && MOP.isKill() && - TRI->regsOverlap(RegToRename, MOP.getReg()); - })) { - LLVM_DEBUG(dbgs() << " Operand not killed at " << FirstMI << "\n"); - return false; - } - auto canRenameMOP = [TRI](const MachineOperand &MOP) { - if (MOP.isReg()) { - auto *RegClass = TRI->getMinimalPhysRegClass(MOP.getReg()); - // Renaming registers with multiple disjunct sub-registers (e.g. the - // result of a LD3) means that all sub-registers are renamed, potentially - // impacting other instructions we did not check. Bail out. - // Note that this relies on the structure of the AArch64 register file. In - // particular, a subregister cannot be written without overwriting the - // whole register. - if (RegClass->HasDisjunctSubRegs) { - LLVM_DEBUG( - dbgs() - << " Cannot rename operands with multiple disjunct subregisters (" - << MOP << ")\n"); - return false; - } - } - return MOP.isImplicit() || - (MOP.isRenamable() && !MOP.isEarlyClobber() && !MOP.isTied()); - }; - - bool FoundDef = false; - - // For each instruction between FirstMI and the previous def for RegToRename, - // we - // * check if we can rename RegToRename in this instruction - // * collect the registers used and required register classes for RegToRename. - std::function CheckMIs = [&](MachineInstr &MI, - bool IsDef) { - LLVM_DEBUG(dbgs() << "Checking " << MI << "\n"); - // Currently we do not try to rename across frame-setup instructions. - if (MI.getFlag(MachineInstr::FrameSetup)) { - LLVM_DEBUG(dbgs() << " Cannot rename framesetup instructions currently (" - << MI << ")\n"); - return false; - } - - UsedInBetween.accumulate(MI); - - // For a definition, check that we can rename the definition and exit the - // loop. - FoundDef = IsDef; - - // For defs, check if we can rename the first def of RegToRename. - if (FoundDef) { - // For some pseudo instructions, we might not generate code in the end - // (e.g. KILL) and we would end up without a correct def for the rename - // register. - // TODO: This might be overly conservative and we could handle those cases - // in multiple ways: - // 1. Insert an extra copy, to materialize the def. - // 2. Skip pseudo-defs until we find an non-pseudo def. - if (MI.isPseudo()) { - LLVM_DEBUG(dbgs() << " Cannot rename pseudo instruction " << MI - << "\n"); - return false; - } - - for (auto &MOP : MI.operands()) { - if (!MOP.isReg() || !MOP.isDef() || MOP.isDebug() || !MOP.getReg() || - !TRI->regsOverlap(MOP.getReg(), RegToRename)) - continue; - if (!canRenameMOP(MOP)) { - LLVM_DEBUG(dbgs() - << " Cannot rename " << MOP << " in " << MI << "\n"); - return false; - } - RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); - } - return true; - } else { - for (auto &MOP : MI.operands()) { - if (!MOP.isReg() || MOP.isDebug() || !MOP.getReg() || - !TRI->regsOverlap(MOP.getReg(), RegToRename)) - continue; - - if (!canRenameMOP(MOP)) { - LLVM_DEBUG(dbgs() - << " Cannot rename " << MOP << " in " << MI << "\n"); - return false; - } - RequiredClasses.insert(TRI->getMinimalPhysRegClass(MOP.getReg())); - } - } - return true; - }; - - if (!forAllMIsUntilDef(FirstMI, RegToRename, TRI, LdStLimit, CheckMIs)) - return false; - - if (!FoundDef) { - LLVM_DEBUG(dbgs() << " Did not find definition for register in BB\n"); - return false; - } - return true; -} - -// Check if we can find a physical register for renaming. This register must: -// * not be defined up to FirstMI (checking DefinedInBB) -// * not used between the MI and the defining instruction of the register to -// rename (checked using UsedInBetween). -// * is available in all used register classes (checked using RequiredClasses). -static Optional tryToFindRegisterToRename( - MachineInstr &FirstMI, MachineInstr &MI, LiveRegUnits &DefinedInBB, - LiveRegUnits &UsedInBetween, - SmallPtrSetImpl &RequiredClasses, - const TargetRegisterInfo *TRI) { - auto &MF = *FirstMI.getParent()->getParent(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - - // Checks if any sub- or super-register of PR is callee saved. - auto AnySubOrSuperRegCalleePreserved = [&MF, TRI](MCPhysReg PR) { - return any_of(TRI->sub_and_superregs_inclusive(PR), - [&MF, TRI](MCPhysReg SubOrSuper) { - return TRI->isCalleeSavedPhysReg(SubOrSuper, MF); - }); - }; - - // Check if PR or one of its sub- or super-registers can be used for all - // required register classes. - auto CanBeUsedForAllClasses = [&RequiredClasses, TRI](MCPhysReg PR) { - return all_of(RequiredClasses, [PR, TRI](const TargetRegisterClass *C) { - return any_of(TRI->sub_and_superregs_inclusive(PR), - [C, TRI](MCPhysReg SubOrSuper) { - return C == TRI->getMinimalPhysRegClass(SubOrSuper); - }); - }); - }; - - auto *RegClass = TRI->getMinimalPhysRegClass(getLdStRegOp(FirstMI).getReg()); - for (const MCPhysReg &PR : *RegClass) { - if (DefinedInBB.available(PR) && UsedInBetween.available(PR) && - !RegInfo.isReserved(PR) && !AnySubOrSuperRegCalleePreserved(PR) && - CanBeUsedForAllClasses(PR)) { - DefinedInBB.addReg(PR); - LLVM_DEBUG(dbgs() << "Found rename register " << printReg(PR, TRI) - << "\n"); - return {PR}; - } - } - LLVM_DEBUG(dbgs() << "No rename register found from " - << TRI->getRegClassName(RegClass) << "\n"); - return None; -} - /// Scan the instructions looking for a load/store that can be combined with the /// current instruction into a wider equivalent or a load/store pair. MachineBasicBlock::iterator @@ -1442,7 +1148,6 @@ bool FindNarrowMerge) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; - MachineBasicBlock::iterator MBBIWithRenameReg; MachineInstr &FirstMI = *I; MBBI = next_nodbg(MBBI, E); @@ -1454,16 +1159,6 @@ int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1; bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI); - Optional MaybeCanRename = None; - if (!EnableRenaming) - MaybeCanRename = {false}; - - SmallPtrSet RequiredClasses; - LiveRegUnits UsedInBetween; - UsedInBetween.init(*TRI); - - Flags.clearRenameReg(); - // Track which register units have been modified and used between the first // insn (inclusive) and the second insn. ModifiedRegUnits.clear(); @@ -1476,8 +1171,6 @@ MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; - UsedInBetween.accumulate(MI); - // Don't count transient instructions towards the search limit since there // may be different numbers of them if e.g. debug information is present. if (!MI.isTransient()) @@ -1581,9 +1274,7 @@ !(MI.mayLoad() && !UsedRegUnits.available(getLdStRegOp(MI).getReg())) && !mayAlias(MI, MemInsns, AA)) { - Flags.setMergeForward(false); - Flags.clearRenameReg(); return MBBI; } @@ -1591,41 +1282,18 @@ // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. - if (!(MayLoad && + if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg()) && + !(MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) && !mayAlias(FirstMI, MemInsns, AA)) { - - if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) { - Flags.setMergeForward(true); - Flags.clearRenameReg(); - return MBBI; - } - - if (DebugCounter::shouldExecute(RegRenamingCounter)) { - if (!MaybeCanRename) - MaybeCanRename = {canRenameUpToDef(FirstMI, UsedInBetween, - RequiredClasses, TRI)}; - - if (*MaybeCanRename) { - Optional MaybeRenameReg = tryToFindRegisterToRename( - FirstMI, MI, DefinedInBB, UsedInBetween, RequiredClasses, - TRI); - if (MaybeRenameReg) { - Flags.setRenameReg(*MaybeRenameReg); - Flags.setMergeForward(true); - MBBIWithRenameReg = MBBI; - } - } - } + Flags.setMergeForward(true); + return MBBI; } // Unable to combine these instructions due to interference in between. // Keep looking. } } - if (Flags.getRenameReg()) - return MBBIWithRenameReg; - // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. if (MI.isCall()) @@ -1987,13 +1655,7 @@ ++NumUnscaledPairCreated; // Keeping the iterator straight is a pain, so we let the merge routine tell // us what the next instruction is after it's done mucking about. - auto Prev = std::prev(MBBI); MBBI = mergePairedInsns(MBBI, Paired, Flags); - // Collect liveness info for instructions between Prev and the new position - // MBBI. - for (auto I = std::next(Prev); I != MBBI; I++) - updateDefinedRegisters(*I, DefinedInBB, TRI); - return true; } return false; @@ -2055,7 +1717,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt) { - bool Modified = false; // Four tranformations to do here: // 1) Find loads that directly read from stores and promote them by @@ -2100,17 +1761,8 @@ // ldr x1, [x2, #8] // ; becomes // ldp x0, x1, [x2] - - if (MBB.getParent()->getRegInfo().tracksLiveness()) { - DefinedInBB.clear(); - DefinedInBB.addLiveIns(MBB); - } - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { - // Track currently live registers up to this point, to help with - // searching for a rename register on demand. - updateDefinedRegisters(*MBBI, DefinedInBB, TRI); if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI)) Modified = true; else @@ -2148,14 +1800,11 @@ // or store. ModifiedRegUnits.init(*TRI); UsedRegUnits.init(*TRI); - DefinedInBB.init(*TRI); bool Modified = false; bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign(); - for (auto &MBB : Fn) { - auto M = optimizeBlock(MBB, enableNarrowZeroStOpt); - Modified |= M; - } + for (auto &MBB : Fn) + Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt); return Modified; } diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -160,6 +160,9 @@ "enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics")); +cl::opt EnableMergeFunc("enable-mergefunc", cl::init(false), cl::Hidden, + cl::desc("Enable merge function")); + cl::opt EnableConstraintElimination( "enable-constraint-elimination", cl::init(false), cl::Hidden, cl::desc( @@ -196,7 +199,7 @@ ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; VerifyInput = false; VerifyOutput = false; - MergeFunctions = false; + MergeFunctions = EnableMergeFunc; SplitColdCode = false; PrepareForLTO = false; EnablePGOInstrGen = false; @@ -943,7 +946,7 @@ // Infer attributes about declarations if possible. PM.add(createInferFunctionAttrsLegacyPass()); - if (OptLevel > 1) { + if (OptLevel > 2) { // Split call-site with more constrained arguments. PM.add(createCallSiteSplittingPass()); @@ -988,7 +991,8 @@ // Now that we internalized some globals, see if we can hack on them! PM.add(createGlobalOptimizerPass()); // Promote any localized global vars. - PM.add(createPromoteMemoryToRegisterPass()); + PM.add(createSROAPass()); + PM.add(createEarlyCSEPass()); // Linking modules together can lead to duplicated global constants, only // keep one copy of each constant. @@ -1041,9 +1045,6 @@ addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); - // Break up allocas - PM.add(createSROAPass()); - // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. if (OptLevel > 1) diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll --- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -14,9 +14,10 @@ ; CHECK-NEXT: stp w6, w5, [sp, #36] ; CHECK-NEXT: str w7, [sp, #32] ; CHECK-NEXT: str w8, [x0] -; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: str w8, [sp, #20] ; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: stp w8, w9, [sp, #16] +; CHECK-NEXT: str w8, [sp, #16] ; CHECK-NEXT: add x8, sp, #72 ; =72 ; CHECK-NEXT: add x8, x8, #24 ; =24 ; CHECK-NEXT: str x8, [sp, #24] @@ -64,18 +65,22 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #96 ; =96 ; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill -; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: str w8, [sp, #76] ; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: stp w8, w9, [sp, #72] -; CHECK-NEXT: mov w9, #3 +; CHECK-NEXT: str w8, [sp, #72] +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: str w8, [sp, #68] ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: stp w8, w9, [sp, #64] -; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: str w8, [sp, #64] +; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: str w8, [sp, #60] ; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: stp w8, w9, [sp, #56] -; CHECK-NEXT: mov w9, #7 +; CHECK-NEXT: str w8, [sp, #56] +; CHECK-NEXT: mov w8, #7 +; CHECK-NEXT: str w8, [sp, #52] ; CHECK-NEXT: mov w8, #8 -; CHECK-NEXT: stp w8, w9, [sp, #48] +; CHECK-NEXT: str w8, [sp, #48] ; CHECK-NEXT: mov w8, #9 ; CHECK-NEXT: mov w9, #10 ; CHECK-NEXT: stp w9, w8, [sp, #40] diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll --- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll @@ -392,8 +392,10 @@ define i32 @caller43() #3 { entry: ; CHECK-LABEL: caller43 -; CHECK-DAG: stp q1, q0, [sp, #32] -; CHECK-DAG: stp q1, q0, [sp] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #16] +; CHECK-DAG: str {{q[0-9]+}}, [sp] ; CHECK: add x1, sp, #32 ; CHECK: mov x2, sp ; Space for s1 is allocated at sp+32 @@ -432,8 +434,10 @@ ; CHECK-LABEL: caller43_stack ; CHECK: sub sp, sp, #112 ; CHECK: add x29, sp, #96 -; CHECK-DAG: stp q1, q0, [x29, #-32] -; CHECK-DAG: stp q1, q0, [sp, #32] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16] +; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] +; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] ; Space for s1 is allocated at x29-32 = sp+64 ; Space for s2 is allocated at sp+32 ; CHECK: add x[[B:[0-9]+]], sp, #32 diff --git a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll --- a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -26,11 +26,11 @@ ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]] ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56 - +; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #128 -; CHECK: stp [[GR_TOP]], [[VR_TOP]], [x[[VA_LIST]], #8] +; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] ; CHECK: mov [[GRVR:x[0-9]+]], #-56 ; CHECK: movk [[GRVR]], #65408, lsl #32 @@ -62,10 +62,11 @@ ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]] ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40 +; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp ; CHECK: add [[VR_TOP:x[0-9]+]], [[VR_TOPTMP]], #112 -; CHECK: stp [[GR_TOP]], [[VR_TOP]], [x[[VA_LIST]], #8] +; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] ; CHECK: mov [[GRVR_OFFS:x[0-9]+]], #-40 ; CHECK: movk [[GRVR_OFFS]], #65424, lsl #32 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll b/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll --- a/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-remarks.ll @@ -4,7 +4,7 @@ ; CHECK-SAME: Bytes from outlining all occurrences (16) >= ; CHECK-SAME: Unoutlined instruction bytes (16) ; CHECK-SAME: (Also found at: ) -; CHECK: remark: :0:0: Saved 40 bytes by outlining 13 instructions +; CHECK: remark: :0:0: Saved 52 bytes by outlining 16 instructions ; CHECK-SAME: from 2 locations. (Found at: , ; CHECK-SAME: ) ; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -o /dev/null -pass-remarks-missed=machine-outliner -pass-remarks-output=%t.yaml @@ -38,10 +38,10 @@ ; YAML-NEXT: Function: OUTLINED_FUNCTION_0 ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Saved ' -; YAML-NEXT: - OutliningBenefit: '40' +; YAML-NEXT: - OutliningBenefit: '52' ; YAML-NEXT: - String: ' bytes by ' ; YAML-NEXT: - String: 'outlining ' -; YAML-NEXT: - Length: '13' +; YAML-NEXT: - Length: '16' ; YAML-NEXT: - String: ' instructions ' ; YAML-NEXT: - String: 'from ' ; YAML-NEXT: - NumOccurrences: '2' diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-side-effect-2.mir b/llvm/test/CodeGen/AArch64/machine-outliner-side-effect-2.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-side-effect-2.mir @@ -0,0 +1,51 @@ +# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s + +# The test checks whether the compiler updates the side effect of function @OUTLINED_FUNCTION_0 by adding the use of register x0. + +--- | + declare void @spam() local_unnamed_addr + define void @bax() optsize minsize noredzone { ret void } + define void @bay() optsize minsize noredzone { ret void } + define void @baz() optsize minsize noredzone { ret void } +... +--- +name: bax +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $lr + + $x1 = ADDXri $sp, 16, 0 + BL @spam, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit-def $sp, implicit-def dead $x0 + + RET_ReallyLR + +... +--- +name: bay +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $lr + + $x1 = ADDXri $sp, 16, 0 + BL @spam, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit-def $sp, implicit-def dead $x0 + + RET_ReallyLR + +... +--- +name: baz +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $lr + + $x1 = ADDXri $sp, 16, 0 + BL @spam, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit-def $sp, implicit-def dead $x0 + + RET_ReallyLR + +... + +# CHECK: BL @OUTLINED_FUNCTION_0, {{.*}}, implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll b/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll --- a/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-throw.ll @@ -4,7 +4,7 @@ ; Make sure that we haven't added nouwind. ; TARGET_FEATURES: define internal void @OUTLINED_FUNCTION_0() ; TARGET_FEATURES-SAME: #[[ATTR_NUM:[0-9]+]] -; TARGET_FEATURES: attributes #[[ATTR_NUM]] = { minsize optsize } +; TARGET_FEATURES: attributes #[[ATTR_NUM]] = { minsize optsize "outliner"="true" } define dso_local i32 @_Z5func1i(i32 %x) #0 { ; CHECK-LABEL: _Z5func1i: diff --git a/llvm/test/CodeGen/AArch64/machine-outliner.ll b/llvm/test/CodeGen/AArch64/machine-outliner.ll --- a/llvm/test/CodeGen/AArch64/machine-outliner.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner.ll @@ -1,5 +1,3 @@ -; RUN: llc -verify-machineinstrs -enable-machine-outliner -aarch64-load-store-renaming=true -mtriple=aarch64-apple-darwin < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -enable-machine-outliner -aarch64-load-store-renaming=true -mtriple=aarch64-apple-darwin -mcpu=cortex-a53 -enable-misched=false < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -enable-machine-outliner -enable-linkonceodr-outlining -mtriple=aarch64-apple-darwin < %s | FileCheck %s -check-prefix=ODR ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-apple-darwin -stop-after=machine-outliner < %s | FileCheck %s -check-prefix=TARGET_FEATURES diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir @@ -1,4 +1,5 @@ # RUN: llc -run-pass=aarch64-ldst-opt -mtriple=arm64-apple-iphoneos -aarch64-load-store-renaming=true -verify-machineinstrs -o - %s | FileCheck %s +; XFAIL: * --- | define void @test_dbg_value1() #0 { ret void } define void @test_dbg_value2() #0 { ret void } diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-reserved-regs.mir @@ -4,7 +4,7 @@ # RUN: llc -run-pass=aarch64-ldst-opt -aarch64-load-store-renaming=true -mtriple=arm64-apple-iphoneos \ # RUN: -verify-machineinstrs -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOPRES %s - +; XFAIL: * # Make sure we do not pick reserved registers. For test1, we would pick x10, # and for test2 we would pick x15, both of which are reserved. diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir @@ -1,5 +1,7 @@ # RUN: llc -run-pass=aarch64-ldst-opt -mtriple=arm64-apple-iphoneos -verify-machineinstrs -aarch64-load-store-renaming=true -o - %s | FileCheck %s +; XFAIL: * # RUN: llc -run-pass=aarch64-ldst-opt -mtriple=arm64-apple-iphoneos -verify-machineinstrs -aarch64-load-store-renaming=false -o - %s | FileCheck --check-prefix=NO-RENAME %s +; XFAIL: * # NO-RENAME-NOT: STP # NO-RENAME: test12 diff --git a/llvm/test/LTO/ARM/link-arm-and-thumb.ll b/llvm/test/LTO/ARM/link-arm-and-thumb.ll --- a/llvm/test/LTO/ARM/link-arm-and-thumb.ll +++ b/llvm/test/LTO/ARM/link-arm-and-thumb.ll @@ -3,7 +3,7 @@ ; ; RUN: llvm-as %s -o %t1.bc ; RUN: llvm-as %p/Inputs/thumb.ll -o %t2.bc -; RUN: llvm-lto -exported-symbol main \ +; RUN: llvm-lto -O3 -exported-symbol main \ ; RUN: -exported-symbol bar \ ; RUN: -filetype=asm \ ; RUN: -o - \ diff --git a/llvm/test/Linker/Inputs/globalorder-2.ll b/llvm/test/Linker/Inputs/globalorder-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Linker/Inputs/globalorder-2.ll @@ -0,0 +1,21 @@ +; Test the order of global variables during llvm-link + +; RUN: llvm-link %s -o %t.bc +; RUN: llvm-dis -o - %t.bc | FileCheck %s + +@var5 = internal global i32 0, align 4 +@var6 = internal global i32 0, align 4 +@var7 = global i32* @var5, align 4 +@var8 = global i32* @var6, align 4 + +define i32 @foo2() { +entry: + %0 = load i32*, i32** @var7, align 4 + %1 = load i32, i32* %0, align 4 + %2 = load i32*, i32** @var8, align 4 + %3 = load i32, i32* %2, align 4 + %add = add nsw i32 %3, %1 + ret i32 %add +} +; CHECK: @var7 = +; CHECK-NEXT: @var8 = diff --git a/llvm/test/Linker/comdat.ll b/llvm/test/Linker/comdat.ll --- a/llvm/test/Linker/comdat.ll +++ b/llvm/test/Linker/comdat.ll @@ -23,9 +23,9 @@ ; CHECK: $foo = comdat largest ; CHECK: $any = comdat any +; CHECK: @foo = global i64 43, comdat{{$}} ; CHECK: @qux = global i64 12, comdat{{$}} ; CHECK: @any = global i64 6, comdat{{$}} -; CHECK: @foo = global i64 43, comdat{{$}} ; CHECK-NOT: @in_unselected_group = global i32 13, comdat $qux ; CHECK: define i32 @baz() comdat($qux) diff --git a/llvm/test/Linker/comdat14.ll b/llvm/test/Linker/comdat14.ll --- a/llvm/test/Linker/comdat14.ll +++ b/llvm/test/Linker/comdat14.ll @@ -5,5 +5,5 @@ @v = global i32 0, comdat ($c) ; CHECK: @v = global i32 0, comdat($c) -; CHECK: @v2 = external dllexport global i32 ; CHECK: @v3 = external global i32 +; CHECK: @v2 = external dllexport global i32 diff --git a/llvm/test/Linker/ctors.ll b/llvm/test/Linker/ctors.ll --- a/llvm/test/Linker/ctors.ll +++ b/llvm/test/Linker/ctors.ll @@ -6,12 +6,12 @@ ; Test the bitcode writer too. It used to crash. ; RUN: llvm-link %s %p/Inputs/ctors.ll -o %t.bc +; ALL: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }] @v = weak global i8 0 ; CHECK1: @v = weak global i8 0 ; CHECK2: @v = weak global i8 1 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }] -; ALL: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }] define weak void @f() { ret void diff --git a/llvm/test/Linker/ctors2.ll b/llvm/test/Linker/ctors2.ll --- a/llvm/test/Linker/ctors2.ll +++ b/llvm/test/Linker/ctors2.ll @@ -3,5 +3,5 @@ $foo = comdat any @foo = global i8 0, comdat -; CHECK: @foo = global i8 0, comdat ; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer +; CHECK: @foo = global i8 0, comdat diff --git a/llvm/test/Linker/ctors3.ll b/llvm/test/Linker/ctors3.ll --- a/llvm/test/Linker/ctors3.ll +++ b/llvm/test/Linker/ctors3.ll @@ -4,5 +4,5 @@ %t = type { i8 } @foo = global %t zeroinitializer, comdat -; CHECK: @foo = global %t zeroinitializer, comdat ; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer +; CHECK: @foo = global %t zeroinitializer, comdat diff --git a/llvm/test/Linker/globalorder.ll b/llvm/test/Linker/globalorder.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Linker/globalorder.ll @@ -0,0 +1,27 @@ +; Test the order of global variables during llvm-link + +; RUN: llvm-link %s %S/Inputs/globalorder-2.ll -o %t.bc +; RUN: llvm-dis -o - %t.bc | FileCheck %s + +@var1 = internal global i32 0, align 4 +@var2 = internal global i32 0, align 4 +@var3 = global i32* @var1, align 4 +@var4 = global i32* @var2, align 4 + +define i32 @foo() { +entry: + %0 = load i32*, i32** @var3, align 4 + %1 = load i32, i32* %0, align 4 + %2 = load i32*, i32** @var4, align 4 + %3 = load i32, i32* %2, align 4 + %add = add nsw i32 %3, %1 + ret i32 %add +} +; CHECK: @var1 = +; CHECK-NEXT: @var2 = +; CHECK-NEXT: @var3 = +; CHECK-NEXT: @var4 = +; CHECK-NEXT: @var5 = +; CHECK-NEXT: @var6 = +; CHECK-NEXT: @var7 = +; CHECK-NEXT: @var8 = diff --git a/llvm/test/Linker/link-flags.ll b/llvm/test/Linker/link-flags.ll --- a/llvm/test/Linker/link-flags.ll +++ b/llvm/test/Linker/link-flags.ll @@ -9,8 +9,8 @@ CU-LABEL:@U = global i32 6 CI-LABEL:@U = internal global i32 6 CN-NOT:@U -DI-LABEL: @Y = global i8 42 DI-LABEL: @llvm.used = appending global [2 x i8*] [i8* @Y, i8* bitcast (i64 ()* @foo to i8*)], section "llvm.metadata" +DI-LABEL: @Y = global i8 42 B-LABEL: define void @bar() { diff --git a/llvm/test/Linker/metadata-attach.ll b/llvm/test/Linker/metadata-attach.ll --- a/llvm/test/Linker/metadata-attach.ll +++ b/llvm/test/Linker/metadata-attach.ll @@ -6,17 +6,17 @@ ; CHECK-LINKED1: @g1 = global i32 0, !attach !0{{$}} @g1 = global i32 0, !attach !0 -; CHECK: @g3 = weak global i32 1, !attach !0{{$}} ; CHECK: @g2 = external global i32, !attach !0{{$}} +; CHECK: @g3 = weak global i32 1, !attach !0{{$}} ; CHECK-LINKED1: @g2 = global i32 1, !attach !1{{$}} @g2 = external global i32, !attach !0 ; CHECK-LINKED1: @g3 = global i32 2, !attach !1{{$}} @g3 = weak global i32 1, !attach !0 -; CHECK-LINKED2: @g2 = global i32 1, !attach !0{{$}} -; CHECK-LINKED2: @g3 = global i32 2, !attach !0{{$}} -; CHECK-LINKED2: @g1 = global i32 0, !attach !1{{$}} +; CHECK-LINKED2: @g1 = global i32 0, !attach !0{{$}} +; CHECK-LINKED2: @g2 = global i32 1, !attach !1{{$}} +; CHECK-LINKED2: @g3 = global i32 2, !attach !1{{$}} ; CHECK: define void @f1() !attach !0 { ; CHECK-LINKED1: define void @f1() !attach !0 { @@ -36,14 +36,14 @@ ret void } -; CHECK-LINKED2: define void @f2() !attach !0 { -; CHECK-LINKED2: define void @f3() !attach !0 { -; CHECK-LINKED2: define void @f1() !attach !1 { +; CHECK-LINKED2: define void @f2() !attach !1 { +; CHECK-LINKED2: define void @f3() !attach !1 { +; CHECK-LINKED2: define void @f1() !attach !0 { ; CHECK-LINKED1: !0 = !{i32 0} ; CHECK-LINKED1: !1 = !{i32 1} -; CHECK-LINKED2: !0 = !{i32 1} -; CHECK-LINKED2: !1 = !{i32 0} +; CHECK-LINKED2: !0 = !{i32 0} +; CHECK-LINKED2: !1 = !{i32 1} !0 = !{i32 0} diff --git a/llvm/test/Linker/testlink.ll b/llvm/test/Linker/testlink.ll --- a/llvm/test/Linker/testlink.ll +++ b/llvm/test/Linker/testlink.ll @@ -1,7 +1,7 @@ ; RUN: llvm-link %s %S/Inputs/testlink.ll -S | FileCheck %s -; CHECK: %Ty2 = type { %Ty1* } ; CHECK: %Ty1 = type { %Ty2* } +; CHECK: %Ty2 = type { %Ty1* } %Ty1 = type opaque %Ty2 = type { %Ty1* } diff --git a/llvm/test/ThinLTO/X86/import-constant.ll b/llvm/test/ThinLTO/X86/import-constant.ll --- a/llvm/test/ThinLTO/X86/import-constant.ll +++ b/llvm/test/ThinLTO/X86/import-constant.ll @@ -28,9 +28,9 @@ ; PROMOTE: @_ZL3Obj.llvm.{{.*}} = hidden constant %struct.S { i32 4, i32 8, i32* @val } ; @outer is a write-only variable, so it's been converted to zeroinitializer. -; IMPORT: @outer = internal local_unnamed_addr global %struct.Q zeroinitializer +; IMPORT: @val = available_externally global i32 42 ; IMPORT-NEXT: @_ZL3Obj.llvm.{{.*}} = available_externally hidden constant %struct.S { i32 4, i32 8, i32* @val } -; IMPORT-NEXT: @val = available_externally global i32 42 +; IMPORT-NEXT: @outer = internal local_unnamed_addr global %struct.Q zeroinitializer ; OPT: @outer = internal unnamed_addr global %struct.Q zeroinitializer @@ -39,8 +39,8 @@ ; OPT-NEXT: store %struct.S* null, %struct.S** getelementptr inbounds (%struct.Q, %struct.Q* @outer, i64 0, i32 0) ; OPT-NEXT: ret i32 12 -; NOREFS: @outer = internal local_unnamed_addr global %struct.Q zeroinitializer -; NOREFS-NEXT: @_ZL3Obj.llvm.{{.*}} = external hidden constant %struct.S +; NOREFS: @_ZL3Obj.llvm.{{.*}} = external hidden constant %struct.S +; NOREFS-NEXT: @outer = internal local_unnamed_addr global %struct.Q zeroinitializer target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/ThinLTO/X86/index-const-prop2.ll b/llvm/test/ThinLTO/X86/index-const-prop2.ll --- a/llvm/test/ThinLTO/X86/index-const-prop2.ll +++ b/llvm/test/ThinLTO/X86/index-const-prop2.ll @@ -57,13 +57,13 @@ ; with corresponsing stores ; RUN: llvm-dis %t5.2.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN2-SRC -; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4 -; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4 +; IMPORT: @gBar = internal local_unnamed_addr global i32 2, align 4 +; IMPORT-NEXT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4 ; IMPORT: !DICompileUnit({{.*}}) ; Write only variables are imported with a zero initializer. -; IMPORT-WRITEONLY: @gFoo.llvm.0 = internal unnamed_addr global i32 0 ; IMPORT-WRITEONLY: @gBar = internal local_unnamed_addr global i32 0 +; IMPORT-WRITEONLY: @gFoo.llvm.0 = internal unnamed_addr global i32 0 ; CODEGEN: i32 @main() ; CODEGEN-NEXT: ret i32 3 diff --git a/llvm/test/ThinLTO/X86/writeonly2.ll b/llvm/test/ThinLTO/X86/writeonly2.ll --- a/llvm/test/ThinLTO/X86/writeonly2.ll +++ b/llvm/test/ThinLTO/X86/writeonly2.ll @@ -19,8 +19,8 @@ ; with corresponsing stores ; RUN: llvm-dis %t3.2.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN-SRC -; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 0, align 4 -; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 0, align 4 +; IMPORT: @gBar = internal local_unnamed_addr global i32 0, align 4 +; IMPORT-NEXT: @gFoo.llvm.0 = internal unnamed_addr global i32 0, align 4 ; IMPORT: !DICompileUnit({{.*}}) ; CODEGEN-NOT: gFoo diff --git a/llvm/tools/lto/lto.cpp b/llvm/tools/lto/lto.cpp --- a/llvm/tools/lto/lto.cpp +++ b/llvm/tools/lto/lto.cpp @@ -164,8 +164,13 @@ CG->setAttr(attrs); } - if (OptLevel < '0' || OptLevel > '3') - report_fatal_error("Optimization level must be between 0 and 3"); + if (OptLevel == 's') + OptLevel = '4'; + else if (OptLevel == 'z') + OptLevel = '5'; + if (OptLevel < '0' || OptLevel > '5') + report_fatal_error( + "Optimization level must be between 0 and 3 or Os or Oz"); CG->setOptLevel(OptLevel - '0'); CG->setFreestanding(EnableFreestanding); } @@ -519,8 +524,13 @@ CodeGen->setFreestanding(EnableFreestanding); if (OptLevel.getNumOccurrences()) { - if (OptLevel < '0' || OptLevel > '3') - report_fatal_error("Optimization level must be between 0 and 3"); + if (OptLevel == 's') + OptLevel = '4'; + else if (OptLevel == 'z') + OptLevel = '5'; + if (OptLevel < '0' || OptLevel > '5') + report_fatal_error( + "Optimization level must be between 0 and 3 or Os or Oz"); CodeGen->setOptLevel(OptLevel - '0'); switch (OptLevel) { case '0':