Index: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2265,7 +2265,7 @@ unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction().optForMinSize()) + if (!Subtarget.optForMinSize()) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR Index: llvm/trunk/lib/Target/ARM/ARMFastISel.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMFastISel.cpp +++ llvm/trunk/lib/Target/ARM/ARMFastISel.cpp @@ -497,7 +497,7 @@ } unsigned ResultReg = 0; - if (Subtarget->useMovt(*FuncInfo.MF)) + if (Subtarget->useMovt()) ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); if (ResultReg) @@ -555,7 +555,7 @@ bool IsPositionIndependent = isPositionIndependent(); // Use movw+movt when possible, it avoids constant pool entries. // Non-darwin targets only support static movt relocations in FastISel. - if (Subtarget->useMovt(*FuncInfo.MF) && + if (Subtarget->useMovt() && (Subtarget->isTargetMachO() || !IsPositionIndependent)) { unsigned Opc; unsigned char TF = 0; Index: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -465,7 +465,7 @@ if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs } - if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT + if (Subtarget->useMovt()) return 2; // MOVW + MOVT return 3; // Literal pool load } Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -567,11 +567,7 @@ return HasStandaloneRem; } - bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override { - if (DAG.getMachineFunction().getFunction().optForMinSize()) - return false; - return true; - } + bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override; CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const; Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -2069,7 +2069,7 @@ auto *GV = cast(Callee)->getGlobal(); auto *BB = CLI.CS.getParent(); bool PreferIndirect = - Subtarget->isThumb() && MF.getFunction().optForMinSize() && + Subtarget->isThumb() && Subtarget->optForMinSize() && count_if(GV->users(), [&BB](const User *U) { return isa(U) && cast(U)->getParent() == BB; }) > 2; @@ -2141,7 +2141,7 @@ CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && // Emit regular call when code size is the priority - !MF.getFunction().optForMinSize()) + !Subtarget->optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else @@ -3224,7 +3224,7 @@ } else if (Subtarget->isRWPI() && !IsRO) { // SB-relative. SDValue RelAddr; - if (Subtarget->useMovt(DAG.getMachineFunction())) { + if (Subtarget->useMovt()) { ++NumMovwMovt; SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL); RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G); @@ -3244,7 +3244,7 @@ // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper. - if (Subtarget->useMovt(DAG.getMachineFunction())) { + if (Subtarget->useMovt()) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. @@ -3267,7 +3267,7 @@ SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); - if (Subtarget->useMovt(DAG.getMachineFunction())) + if (Subtarget->useMovt()) ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register @@ -3287,7 +3287,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); - assert(Subtarget->useMovt(DAG.getMachineFunction()) && + assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt"); assert(!Subtarget->isROPI() && !Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"); @@ -7808,8 +7808,7 @@ return SDValue(); const auto &ST = static_cast(DAG.getSubtarget()); - const auto &MF = DAG.getMachineFunction(); - const bool MinSize = MF.getFunction().optForMinSize(); + const bool MinSize = ST.optForMinSize(); const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode() : ST.hasDivideInARMMode(); @@ -8979,7 +8978,7 @@ // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); - if (Subtarget->useMovt(*MF)) { + if (Subtarget->useMovt()) { unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) Vtmp = MRI.createVirtualRegister(TRC); @@ -14714,6 +14713,10 @@ return Subtarget->hasV6T2Ops(); } +bool ARMTargetLowering::shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { + return !Subtarget->optForMinSize(); +} + Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Index: llvm/trunk/lib/Target/ARM/ARMInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.cpp @@ -94,7 +94,7 @@ const ARMSubtarget &Subtarget = MF.getSubtarget(); const TargetMachine &TM = MF.getTarget(); - if (!Subtarget.useMovt(MF)) { + if (!Subtarget.useMovt()) { if (TM.isPositionIndependent()) expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12); else Index: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td @@ -354,14 +354,14 @@ // FIXME: Eventually this will be just "hasV6T2Ops". let RecomputePerFunction = 1 in { - def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; - def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; - def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">; - def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">; + def UseMovt : Predicate<"Subtarget->useMovt()">; + def DontUseMovt : Predicate<"!Subtarget->useMovt()">; + def UseMovtInPic : Predicate<"Subtarget->useMovt() && Subtarget->allowPositionIndependentMovt()">; + def DontUseMovtInPic : Predicate<"!Subtarget->useMovt() || !Subtarget->allowPositionIndependentMovt()">; def UseFPVMLx: Predicate<"((Subtarget->useFPVMLx() &&" " TM.Options.AllowFPOpFusion != FPOpFusion::Fast) ||" - "MF->getFunction().optForMinSize())">; + "Subtarget->optForMinSize())">; } def UseMulOps : Predicate<"Subtarget->useMulOps()">; @@ -718,15 +718,14 @@ /// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal() def arm_i32imm : PatLeaf<(imm), [{ - if (Subtarget->useMovt(*MF)) + if (Subtarget->useMovt()) return true; return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]> { // Ideally this would be an IntImmLeaf, but then we wouldn't have access to // the MachineFunction. let GISelPredicateCode = [{ - const auto &MF = *MI.getParent()->getParent(); - if (STI.useMovt(MF)) + if (STI.useMovt()) return true; const auto &MO = MI.getOperand(1); Index: llvm/trunk/lib/Target/ARM/ARMInstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstructionSelector.cpp +++ llvm/trunk/lib/Target/ARM/ARMInstructionSelector.cpp @@ -581,7 +581,7 @@ auto &MBB = *MIB->getParent(); auto &MF = *MBB.getParent(); - bool UseMovt = STI.useMovt(MF); + bool UseMovt = STI.useMovt(); unsigned Size = TM.getPointerSize(0); unsigned Alignment = 4; Index: llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1286,7 +1286,7 @@ // can still change to a writeback form as that will save us 2 bytes // of code size. It can create WAW hazards though, so only do it if // we're minimizing code size. - if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill) + if (!STI->optForMinSize() || !BaseKill) return false; bool HighRegsUsed = false; Index: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td +++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td @@ -301,7 +301,7 @@ (decimate (rotl SPR, 1), 4), (decimate (rotl SPR, 1), 2))]; let AltOrderSelect = [{ - return 1 + MF.getSubtarget().useStride4VFPs(MF); + return 1 + MF.getSubtarget().useStride4VFPs(); }]; let DiagnosticString = "operand must be a register in range [s0, s31]"; } @@ -313,7 +313,7 @@ (decimate (rotl HPR, 1), 4), (decimate (rotl HPR, 1), 2))]; let AltOrderSelect = [{ - return 1 + MF.getSubtarget().useStride4VFPs(MF); + return 1 + MF.getSubtarget().useStride4VFPs(); }]; let DiagnosticString = "operand must be a register in range [s0, s31]"; } @@ -335,7 +335,7 @@ let AltOrders = [(rotl DPR, 16), (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))]; let AltOrderSelect = [{ - return 1 + MF.getSubtarget().useStride4VFPs(MF); + return 1 + MF.getSubtarget().useStride4VFPs(); }]; let DiagnosticType = "DPR"; } Index: llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -170,7 +170,7 @@ // Code size optimisation: do not inline memcpy if expansion results in // more instructions than the libary call. - if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) { + if (NumMEMCPYs > 1 && Subtarget.optForMinSize()) { return SDValue(); } Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h @@ -445,6 +445,10 @@ /// What alignment is preferred for loop bodies, in log2(bytes). unsigned PrefLoopAlignment = 0; + /// OptMinSize - True if we're optimising for minimum code size, equal to + /// the function attribute. + bool OptMinSize = false; + /// IsLittle - The target is Little Endian bool IsLittle; @@ -467,7 +471,8 @@ /// of the specified triple. /// ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - const ARMBaseTargetMachine &TM, bool IsLittle); + const ARMBaseTargetMachine &TM, bool IsLittle, + bool MinSize = false); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -709,6 +714,7 @@ bool disablePostRAScheduler() const { return DisablePostRAScheduler; } bool useSoftFloat() const { return UseSoftFloat; } bool isThumb() const { return InThumbMode; } + bool optForMinSize() const { return OptMinSize; } bool isThumb1Only() const { return InThumbMode && !HasThumb2; } bool isThumb2() const { return InThumbMode && HasThumb2; } bool hasThumb2() const { return HasThumb2; } @@ -735,9 +741,9 @@ isThumb1Only(); } - bool useStride4VFPs(const MachineFunction &MF) const; + bool useStride4VFPs() const; - bool useMovt(const MachineFunction &MF) const; + bool useMovt() const; bool supportsTailCall() const { return SupportsTailCall; } Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp @@ -92,10 +92,12 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - const ARMBaseTargetMachine &TM, bool IsLittle) + const ARMBaseTargetMachine &TM, bool IsLittle, + bool MinSize) : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), - CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), - TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), + CPUString(CPU), OptMinSize(MinSize), IsLittle(IsLittle), + TargetTriple(TT), Options(TM.Options), TM(TM), + FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so // we can query directly. InstrInfo(isThumb1Only() @@ -373,20 +375,20 @@ bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } -bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { +bool ARMSubtarget::useStride4VFPs() const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind // format which it's more important to get right. return isTargetWatchABI() || - (useWideStrideVFP() && !MF.getFunction().optForMinSize()); + (useWideStrideVFP() && !OptMinSize); } -bool ARMSubtarget::useMovt(const MachineFunction &MF) const { +bool ARMSubtarget::useMovt() const { // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit // immediates as it is inherently position independent, and may be out of // range otherwise. return !NoMovt && hasV8MBaselineOps() && - (isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly()); + (isTargetWindows() || !OptMinSize || genExecuteOnly()); } bool ARMSubtarget::useFastISel() const { Index: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp +++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp @@ -263,13 +263,20 @@ if (SoftFloat) FS += FS.empty() ? "+soft-float" : ",+soft-float"; - auto &I = SubtargetMap[CPU + FS]; + // Use the optminsize to identify the subtarget, but don't use it in the + // feature string. + std::string Key = CPU + FS; + if (F.optForMinSize()) + Key += "+minsize"; + + auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = llvm::make_unique(TargetTriple, CPU, FS, *this, isLittle); + I = llvm::make_unique(TargetTriple, CPU, FS, *this, isLittle, + F.optForMinSize()); if (!I->isThumb() && !I->hasARMOps()) F.getContext().emitError("Function '" + F.getName() + "' uses ARM " Index: llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp +++ llvm/trunk/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -453,7 +453,7 @@ break; case ARM::t2LDR_POST: case ARM::t2STR_POST: { - if (!MBB.getParent()->getFunction().optForMinSize()) + if (!MinimizeSize) return false; if (!MI->hasOneMemOperand() || @@ -1128,7 +1128,7 @@ // Optimizing / minimizing size? Minimizing size implies optimizing for size. OptimizeSize = MF.getFunction().optForSize(); - MinimizeSize = MF.getFunction().optForMinSize(); + MinimizeSize = STI->optForMinSize(); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs());