Index: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2265,7 +2265,7 @@
                                       unsigned NumBytes) {
   // This optimisation potentially adds lots of load and store
   // micro-operations, it's only really a great benefit to code-size.
-  if (!MF.getFunction().optForMinSize())
+  if (!Subtarget.optForMinSize())
     return false;
 
   // If only one register is pushed/popped, LLVM can use an LDR/STR
Index: llvm/trunk/lib/Target/ARM/ARMFastISel.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMFastISel.cpp
+++ llvm/trunk/lib/Target/ARM/ARMFastISel.cpp
@@ -497,7 +497,7 @@
   }
 
   unsigned ResultReg = 0;
-  if (Subtarget->useMovt(*FuncInfo.MF))
+  if (Subtarget->useMovt())
     ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
 
   if (ResultReg)
@@ -555,7 +555,7 @@
   bool IsPositionIndependent = isPositionIndependent();
   // Use movw+movt when possible, it avoids constant pool entries.
   // Non-darwin targets only support static movt relocations in FastISel.
-  if (Subtarget->useMovt(*FuncInfo.MF) &&
+  if (Subtarget->useMovt() &&
       (Subtarget->isTargetMachO() || !IsPositionIndependent)) {
     unsigned Opc;
     unsigned char TF = 0;
Index: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -465,7 +465,7 @@
     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
   }
-  if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
+  if (Subtarget->useMovt()) return 2; // MOVW + MOVT
   return 3; // Literal pool load
 }
 
Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h
@@ -567,11 +567,7 @@
       return HasStandaloneRem;
     }
 
-    bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
-      if (DAG.getMachineFunction().getFunction().optForMinSize())
-        return false;
-      return true;
-    }
+    bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
 
     CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
     CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
@@ -2069,7 +2069,7 @@
     auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
     auto *BB = CLI.CS.getParent();
     bool PreferIndirect =
-        Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
+        Subtarget->isThumb() && Subtarget->optForMinSize() &&
         count_if(GV->users(), [&BB](const User *U) {
           return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
         }) > 2;
@@ -2141,7 +2141,7 @@
       CallOpc = ARMISD::CALL_NOLINK;
     else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
              // Emit regular call when code size is the priority
-             !MF.getFunction().optForMinSize())
+             !Subtarget->optForMinSize())
       // "mov lr, pc; b _foo" to avoid confusing the RSP
       CallOpc = ARMISD::CALL_NOLINK;
     else
@@ -3224,7 +3224,7 @@
   } else if (Subtarget->isRWPI() && !IsRO) {
     // SB-relative.
     SDValue RelAddr;
-    if (Subtarget->useMovt(DAG.getMachineFunction())) {
+    if (Subtarget->useMovt()) {
       ++NumMovwMovt;
       SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
       RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
@@ -3244,7 +3244,7 @@
 
   // If we have T2 ops, we can materialize the address directly via movt/movw
   // pair. This is always cheaper.
-  if (Subtarget->useMovt(DAG.getMachineFunction())) {
+  if (Subtarget->useMovt()) {
     ++NumMovwMovt;
     // FIXME: Once remat is capable of dealing with instructions with register
     // operands, expand this into two nodes.
@@ -3267,7 +3267,7 @@
   SDLoc dl(Op);
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 
-  if (Subtarget->useMovt(DAG.getMachineFunction()))
+  if (Subtarget->useMovt())
     ++NumMovwMovt;
 
   // FIXME: Once remat is capable of dealing with instructions with register
@@ -3287,7 +3287,7 @@
 SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
                                                      SelectionDAG &DAG) const {
   assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
-  assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
+  assert(Subtarget->useMovt() &&
          "Windows on ARM expects to use movw/movt");
   assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
          "ROPI/RWPI not currently supported for Windows");
@@ -7808,8 +7808,7 @@
     return SDValue();
 
   const auto &ST = static_cast<const ARMSubtarget&>(DAG.getSubtarget());
-  const auto &MF = DAG.getMachineFunction();
-  const bool MinSize = MF.getFunction().optForMinSize();
+  const bool MinSize = ST.optForMinSize();
   const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
                                       : ST.hasDivideInARMMode();
 
@@ -8979,7 +8978,7 @@
 
   // Load an immediate to varEnd.
   unsigned varEnd = MRI.createVirtualRegister(TRC);
-  if (Subtarget->useMovt(*MF)) {
+  if (Subtarget->useMovt()) {
     unsigned Vtmp = varEnd;
     if ((LoopSize & 0xFFFF0000) != 0)
       Vtmp = MRI.createVirtualRegister(TRC);
@@ -14714,6 +14713,10 @@
   return Subtarget->hasV6T2Ops();
 }
 
+bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
+  return !Subtarget->optForMinSize();
+}
+
 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                                          AtomicOrdering Ord) const {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Index: llvm/trunk/lib/Target/ARM/ARMInstrInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.cpp
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.cpp
@@ -94,7 +94,7 @@
   const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
   const TargetMachine &TM = MF.getTarget();
 
-  if (!Subtarget.useMovt(MF)) {
+  if (!Subtarget.useMovt()) {
     if (TM.isPositionIndependent())
       expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
     else
Index: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
@@ -354,14 +354,14 @@
 
 // FIXME: Eventually this will be just "hasV6T2Ops".
 let RecomputePerFunction = 1 in {
-  def UseMovt          : Predicate<"Subtarget->useMovt(*MF)">;
-  def DontUseMovt      : Predicate<"!Subtarget->useMovt(*MF)">;
-  def UseMovtInPic     : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">;
-  def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">;
+  def UseMovt          : Predicate<"Subtarget->useMovt()">;
+  def DontUseMovt      : Predicate<"!Subtarget->useMovt()">;
+  def UseMovtInPic     : Predicate<"Subtarget->useMovt() && Subtarget->allowPositionIndependentMovt()">;
+  def DontUseMovtInPic : Predicate<"!Subtarget->useMovt() || !Subtarget->allowPositionIndependentMovt()">;
 
   def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&"
                            "  TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||"
-                           "MF->getFunction().optForMinSize())">;
+                           "Subtarget->optForMinSize())">;
 }
 def UseMulOps        : Predicate<"Subtarget->useMulOps()">;
 
@@ -718,15 +718,14 @@
 
 /// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal()
 def arm_i32imm : PatLeaf<(imm), [{
-  if (Subtarget->useMovt(*MF))
+  if (Subtarget->useMovt())
     return true;
   return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
 }]> {
   // Ideally this would be an IntImmLeaf, but then we wouldn't have access to
   // the MachineFunction.
   let GISelPredicateCode = [{
-    const auto &MF = *MI.getParent()->getParent();
-    if (STI.useMovt(MF))
+    if (STI.useMovt())
       return true;
 
     const auto &MO = MI.getOperand(1);
Index: llvm/trunk/lib/Target/ARM/ARMInstructionSelector.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstructionSelector.cpp
+++ llvm/trunk/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -581,7 +581,7 @@
   auto &MBB = *MIB->getParent();
   auto &MF = *MBB.getParent();
 
-  bool UseMovt = STI.useMovt(MF);
+  bool UseMovt = STI.useMovt();
 
   unsigned Size = TM.getPointerSize(0);
   unsigned Alignment = 4;
Index: llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1286,7 +1286,7 @@
       // can still change to a writeback form as that will save us 2 bytes
       // of code size. It can create WAW hazards though, so only do it if
       // we're minimizing code size.
-      if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
+      if (!STI->optForMinSize() || !BaseKill)
         return false;
 
       bool HighRegsUsed = false;
Index: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td
+++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td
@@ -301,7 +301,7 @@
                         (decimate (rotl SPR, 1), 4),
                         (decimate (rotl SPR, 1), 2))];
   let AltOrderSelect = [{
-    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
   }];
   let DiagnosticString = "operand must be a register in range [s0, s31]";
 }
@@ -313,7 +313,7 @@
                         (decimate (rotl HPR, 1), 4),
                         (decimate (rotl HPR, 1), 2))];
   let AltOrderSelect = [{
-    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
   }];
   let DiagnosticString = "operand must be a register in range [s0, s31]";
 }
@@ -335,7 +335,7 @@
   let AltOrders = [(rotl DPR, 16),
                    (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
   let AltOrderSelect = [{
-    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs();
   }];
   let DiagnosticType = "DPR";
 }
Index: llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -170,7 +170,7 @@
 
   // Code size optimisation: do not inline memcpy if expansion results in
   // more instructions than the libary call.
-  if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) {
+  if (NumMEMCPYs > 1 && Subtarget.optForMinSize()) {
     return SDValue();
   }
 
Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.h
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.h
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h
@@ -445,6 +445,10 @@
   /// What alignment is preferred for loop bodies, in log2(bytes).
   unsigned PrefLoopAlignment = 0;
 
+  /// OptMinSize - True if we're optimising for minimum code size, equal to
+  /// the function attribute.
+  bool OptMinSize = false;
+
   /// IsLittle - The target is Little Endian
   bool IsLittle;
 
@@ -467,7 +471,8 @@
   /// of the specified triple.
   ///
   ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
-               const ARMBaseTargetMachine &TM, bool IsLittle);
+               const ARMBaseTargetMachine &TM, bool IsLittle,
+               bool MinSize = false);
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
@@ -709,6 +714,7 @@
   bool disablePostRAScheduler() const { return DisablePostRAScheduler; }
   bool useSoftFloat() const { return UseSoftFloat; }
   bool isThumb() const { return InThumbMode; }
+  bool optForMinSize() const { return OptMinSize; }
   bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
   bool isThumb2() const { return InThumbMode && HasThumb2; }
   bool hasThumb2() const { return HasThumb2; }
@@ -735,9 +741,9 @@
            isThumb1Only();
   }
 
-  bool useStride4VFPs(const MachineFunction &MF) const;
+  bool useStride4VFPs() const;
 
-  bool useMovt(const MachineFunction &MF) const;
+  bool useMovt() const;
 
   bool supportsTailCall() const { return SupportsTailCall; }
 
Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
@@ -92,10 +92,12 @@
 
 ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS,
-                           const ARMBaseTargetMachine &TM, bool IsLittle)
+                           const ARMBaseTargetMachine &TM, bool IsLittle,
+                           bool MinSize)
     : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
-      CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
-      TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
+      CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle),
+      TargetTriple(TT), Options(TM.Options), TM(TM),
+      FrameLowering(initializeFrameLowering(CPU, FS)),
       // At this point initializeSubtargetDependencies has been called so
       // we can query directly.
       InstrInfo(isThumb1Only()
@@ -373,20 +375,20 @@
 
 bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
 
-bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
+bool ARMSubtarget::useStride4VFPs() const {
   // For general targets, the prologue can grow when VFPs are allocated with
   // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
   // format which it's more important to get right.
   return isTargetWatchABI() ||
-         (useWideStrideVFP() && !MF.getFunction().optForMinSize());
+         (useWideStrideVFP() && !OptMinSize);
 }
 
-bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
+bool ARMSubtarget::useMovt() const {
   // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
   // immediates as it is inherently position independent, and may be out of
   // range otherwise.
   return !NoMovt && hasV8MBaselineOps() &&
-         (isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly());
+         (isTargetWindows() || !OptMinSize || genExecuteOnly());
 }
 
 bool ARMSubtarget::useFastISel() const {
Index: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
@@ -263,13 +263,20 @@
   if (SoftFloat)
     FS += FS.empty() ? "+soft-float" : ",+soft-float";
 
-  auto &I = SubtargetMap[CPU + FS];
+  // Use the optminsize to identify the subtarget, but don't use it in the
+  // feature string.
+  std::string Key = CPU + FS;
+  if (F.optForMinSize())
+    Key += "+minsize";
+
+  auto &I = SubtargetMap[Key];
   if (!I) {
     // This needs to be done before we create a new subtarget since any
     // creation will depend on the TM and the code generation flags on the
     // function that reside in TargetOptions.
     resetTargetOptions(F);
-    I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
+    I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle,
+                                        F.optForMinSize());
 
     if (!I->isThumb() && !I->hasARMOps())
       F.getContext().emitError("Function '" + F.getName() + "' uses ARM "
Index: llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -453,7 +453,7 @@
     break;
   case ARM::t2LDR_POST:
   case ARM::t2STR_POST: {
-    if (!MBB.getParent()->getFunction().optForMinSize())
+    if (!MinimizeSize)
       return false;
 
     if (!MI->hasOneMemOperand() ||
@@ -1128,7 +1128,7 @@
 
   // Optimizing / minimizing size? Minimizing size implies optimizing for size.
   OptimizeSize = MF.getFunction().optForSize();
-  MinimizeSize = MF.getFunction().optForMinSize();
+  MinimizeSize = STI->optForMinSize();
 
   BlockInfo.clear();
   BlockInfo.resize(MF.getNumBlockIDs());