diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -540,6 +540,11 @@ bool selectXRayCustomEvent(const CallInst *II); bool selectXRayTypedEvent(const CallInst *II); + bool shouldOptForSize(const MachineFunction *MF) const { + // TODO: Implement PGSO. + return MF->getFunction().hasOptSize(); + } + private: /// Handle PHI nodes in successor blocks. /// diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -58,6 +58,7 @@ class AAResults; class BlockAddress; +class BlockFrequencyInfo; class Constant; class ConstantFP; class ConstantInt; @@ -71,6 +72,7 @@ class MachineConstantPoolValue; class MCSymbol; class OptimizationRemarkEmitter; +class ProfileSummaryInfo; class SDDbgValue; class SDDbgLabel; class SelectionDAG; @@ -235,6 +237,9 @@ /// whenever manipulating the DAG. OptimizationRemarkEmitter *ORE; + ProfileSummaryInfo *PSI = nullptr; + BlockFrequencyInfo *BFI = nullptr; + /// The starting token. SDNode EntryNode; @@ -401,7 +406,8 @@ /// Prepare this SelectionDAG to process code in the given MachineFunction. void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - LegacyDivergenceAnalysis * Divergence); + LegacyDivergenceAnalysis * Divergence, + ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin); void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) { FLI = FuncInfo; @@ -423,6 +429,8 @@ const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; } LLVMContext *getContext() const {return Context; } OptimizationRemarkEmitter &getORE() const { return *ORE; } + ProfileSummaryInfo *getPSI() const { return PSI; } + BlockFrequencyInfo *getBFI() const { return BFI; } /// Pop up a GraphViz/gv window with the DAG rendered using 'dot'. void viewGraph(const std::string &Title); @@ -1717,6 +1725,8 @@ return MF->getDenormalMode(EVTToAPFloatSemantics(VT)); } + bool shouldOptForSize() const; + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -39,6 +39,8 @@ class GCFunctionInfo; class ScheduleDAGSDNodes; class LoadInst; +class ProfileSummaryInfo; +class BlockFrequencyInfo; /// SelectionDAGISel - This is the common base class used for SelectionDAG-based /// pattern-matching instruction selectors. @@ -249,6 +251,11 @@ virtual StringRef getIncludePathForIndex(unsigned index) { llvm_unreachable("Tblgen should generate the implementation of this!"); } + + bool shouldOptForSize(const MachineFunction *MF) const { + return CurDAG->shouldOptForSize(); + } + public: // Calls to these predicates are generated by tblgen. bool CheckAndMask(SDValue LHS, ConstantSDNode *RHS, diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -217,7 +217,7 @@ DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) { - ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + ForCodeSize = DAG.shouldOptForSize(); MaximumLegalStoreInBits = 0; // We use the minimum store size here, since that's all we can guarantee @@ -12885,7 +12885,7 @@ // Assume that libcalls are the smallest code. // TODO: This restriction should probably be lifted for vectors. - if (DAG.getMachineFunction().getFunction().hasOptSize()) + if (ForCodeSize) return SDValue(); // pow(X, 0.25) --> sqrt(sqrt(X)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -63,6 +65,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include #include #include @@ -1005,7 +1008,9 @@ void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - LegacyDivergenceAnalysis * Divergence) { + LegacyDivergenceAnalysis * Divergence, + ProfileSummaryInfo *PSIin, + BlockFrequencyInfo *BFIin) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -1014,6 +1019,8 @@ LibInfo = LibraryInfo; Context = &MF->getFunction().getContext(); DA = Divergence; + PSI = PSIin; + BFI = BFIin; } SelectionDAG::~SelectionDAG() { @@ -1023,6 +1030,11 @@ delete DbgInfo; } +bool SelectionDAG::shouldOptForSize() const { + return MF->getFunction().hasOptSize() || + llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI); +} + void SelectionDAG::allnodes_clear() { assert(&*AllNodes.begin() == &EntryNode); AllNodes.remove(AllNodes.begin()); @@ -1427,7 +1439,7 @@ assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = MF->getFunction().hasOptSize() + Alignment = shouldOptForSize() ? getDataLayout().getABITypeAlignment(C->getType()) : getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; @@ -5733,12 +5745,13 @@ SrcDelta + G->getOffset()); } -static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { +static bool shouldLowerMemFuncForSize(const MachineFunction &MF, + SelectionDAG &DAG) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. if (MF.getTarget().getTargetTriple().isOSDarwin()) return MF.getFunction().hasMinSize(); - return MF.getFunction().hasOptSize(); + return DAG.shouldOptForSize(); } static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, @@ -5788,7 +5801,7 @@ bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -5971,7 +5984,7 @@ bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -6077,7 +6090,7 @@ bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(MF, DAG); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -28,10 +28,12 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" @@ -5360,8 +5362,8 @@ if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - const Function &F = DAG.getMachineFunction().getFunction(); - if (!F.hasOptSize() || + bool OptForSize = DAG.shouldOptForSize(); + if (!OptForSize || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { @@ -10441,7 +10443,7 @@ return; } - SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr); + SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI()); SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -444,7 +444,8 @@ SplitCriticalSideEffectEdges(const_cast(Fn), DT, LI); CurDAG->init(*MF, *ORE, this, LibInfo, - getAnalysisIfAvailable()); + getAnalysisIfAvailable(), + nullptr, nullptr); FuncInfo->set(Fn, *MF, CurDAG); SwiftError->setFunction(*MF); diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -39,20 +39,16 @@ /// make the right decision when generating code for different targets. const AArch64Subtarget *Subtarget; - bool ForCodeSize; - public: explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), - ForCodeSize(false) {} + : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} StringRef getPassName() const override { return "AArch64 Instruction Selection"; } bool runOnMachineFunction(MachineFunction &MF) override { - ForCodeSize = MF.getFunction().hasOptSize(); Subtarget = &MF.getSubtarget(); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -399,7 +395,7 @@ bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { // Trivial if we are optimizing for code size or if there is only // one use of the value. - if (ForCodeSize || V.hasOneUse()) + if (CurDAG->shouldOptForSize() || V.hasOneUse()) return true; // If a subtarget has a fastpath LSL we can fold a logical shift into // the addressing mode and save a cycle. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -518,10 +518,10 @@ // the Function object through the Subtarget and objections were raised // to that (see post-commit review comments for r301750). let RecomputePerFunction = 1 in { - def ForCodeSize : Predicate<"MF->getFunction().hasOptSize()">; - def NotForCodeSize : Predicate<"!MF->getFunction().hasOptSize()">; + def ForCodeSize : Predicate<"shouldOptForSize(MF)">; + def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; // Avoid generating STRQro if it is slow, unless we're optimizing for code size. - def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || MF->getFunction().hasOptSize()">; + def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; def UseBTI : Predicate<[{ MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>; def NotUseBTI : Predicate<[{ !MF->getFunction().hasFnAttribute("branch-target-enforcement") }]>; diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -335,7 +335,7 @@ // Do not want to hoist if we're not optimizing for size. // TODO: We'd like to remove this restriction. // See the comment in X86InstrInfo.td for more info. - if (!OptForSize) + if (!CurDAG->shouldOptForSize()) return false; // Walk all the users of the immediate. @@ -3019,7 +3019,7 @@ LLVM_FALLTHROUGH; case X86ISD::ADD: // Try to match inc/dec. - if (!Subtarget->slowIncDec() || OptForSize) { + if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) { bool IsOne = isOneConstant(StoredVal.getOperand(1)); bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1)); // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25,7 +25,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -8397,7 +8399,7 @@ // TODO: If multiple splats are generated to load the same constant, // it may be detrimental to overall size. There needs to be a way to detect // that condition to know if this is truly a size win. - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); // Handle broadcasting a single constant scalar from the constant pool // into a vector. @@ -11174,7 +11176,7 @@ case MVT::v32i16: case MVT::v64i8: { // Attempt to lower to a bitmask if we can. Only if not optimizing for size. - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); if (!OptForSize) { if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -18315,7 +18317,7 @@ "Unexpected funnel shift type!"); // Expand slow SHLD/SHRD cases if we are not optimizing for size. - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); if (!OptForSize && Subtarget.isSHLDSlow()) return SDValue(); @@ -18547,7 +18549,7 @@ /// implementation, and likely shuffle complexity of the alternate sequence. static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - bool IsOptimizingSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool IsOptimizingSize = DAG.shouldOptForSize(); bool HasFastHOps = Subtarget.hasFastHorizontalOps(); return !IsSingleSource || IsOptimizingSize || HasFastHOps; } @@ -20473,7 +20475,7 @@ } else { // Use BT if the immediate can't be encoded in a TEST instruction or we // are optimizing for size and the immedaite won't fit in a byte. - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) && isPowerOf2_64(AndRHSVal)) { Src = AndLHS; @@ -39645,7 +39647,7 @@ return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) - bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool OptForSize = DAG.shouldOptForSize(); unsigned Bits = VT.getScalarSizeInBits(); // SHLD/SHRD instructions have lower register pressure, but on some diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -983,12 +983,12 @@ // the Function object through the Subtarget and objections were raised // to that (see post-commit review comments for r301750). let RecomputePerFunction = 1 in { - def OptForSize : Predicate<"MF->getFunction().hasOptSize()">; + def OptForSize : Predicate<"shouldOptForSize(MF)">; def OptForMinSize : Predicate<"MF->getFunction().hasMinSize()">; - def OptForSpeed : Predicate<"!MF->getFunction().hasOptSize()">; + def OptForSpeed : Predicate<"!shouldOptForSize(MF)">; def UseIncDec : Predicate<"!Subtarget->slowIncDec() || " - "MF->getFunction().hasOptSize()">; - def NoSSE41_Or_OptForSize : Predicate<"MF->getFunction().hasOptSize() || " + "shouldOptForSize(MF)">; + def NoSSE41_Or_OptForSize : Predicate<"shouldOptForSize(MF) || " "!Subtarget->hasSSE41()">; } diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -66,7 +66,7 @@ if (!DAG) report_fatal_error("DAG?"); OptimizationRemarkEmitter ORE(F); - DAG->init(*MF, ORE, nullptr, nullptr, nullptr); + DAG->init(*MF, ORE, nullptr, nullptr, nullptr, nullptr, nullptr); } LLVMContext Context; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -5129,6 +5129,14 @@ SubtargetFeatureInfo::emitComputeAvailableFeatures( Target.getName(), "InstructionSelector", "computeAvailableModuleFeatures", ModuleFeatures, OS); + + if (Target.getName() == "X86" || Target.getName() == "AArch64") { + // TODO: Implement PGSO. + OS << "static bool shouldOptForSize(const MachineFunction *MF) {\n"; + OS << " return MF->getFunction().hasOptSize();\n"; + OS << "}\n\n"; + } + SubtargetFeatureInfo::emitComputeAvailableFeatures( Target.getName(), "InstructionSelector", "computeAvailableFunctionFeatures", FunctionFeatures, OS,