Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -183,6 +183,8 @@ MachineFunction *MF; LLVMContext *Context; CodeGenOpt::Level OptLevel; + bool OptForSize; // Corresponds to -Os or -Oz (-Oz implies -Os). + bool MinSize; // Corresponds to -Oz alone. /// The starting token. SDNode EntryNode; @@ -351,6 +353,14 @@ checkForCycles(this); return Root; } + + bool getOptForSize() const { + return OptForSize; + } + + bool getMinSize() const { + return MinSize; + } /// This iterates over the nodes in the SelectionDAG, folding /// certain types of nodes together, or eliminating superfluous nodes. The Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -90,7 +90,6 @@ CodeGenOpt::Level OptLevel; bool LegalOperations; bool LegalTypes; - bool ForCodeSize; /// \brief Worklist of all of the nodes that need to be simplified. /// @@ -428,9 +427,6 @@ DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { - auto *F = DAG.getMachineFunction().getFunction(); - ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) || - F->hasFnAttribute(Attribute::MinSize); } /// Runs the dag combiner on all nodes in the work list @@ -10190,7 +10186,7 @@ } // Abort slicing if it does not seem to be profitable. - if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) + if (!isSlicingProfitable(LoadedSlices, UsedBits, DAG.getOptForSize())) return false; ++SlicedLoads; Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -937,7 +937,11 @@ MF = &mf; TLI = getSubtarget().getTargetLowering(); TSI = getSubtarget().getSelectionDAGInfo(); - Context = &mf.getFunction()->getContext(); + const Function *F = mf.getFunction(); + Context = &F->getContext(); + OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize) || + F->hasFnAttribute(Attribute::MinSize); + MinSize = F->hasFnAttribute(Attribute::MinSize); } SelectionDAG::~SelectionDAG() { @@ -4151,16 +4155,12 @@ return true; } -static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { - const Function *F = MF.getFunction(); - bool HasMinSize = F->hasFnAttribute(Attribute::MinSize); - bool HasOptSize = F->hasFnAttribute(Attribute::OptimizeForSize); - +static bool shouldLowerMemFuncForSize(const SelectionDAG &DAG) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. - if (MF.getTarget().getTargetTriple().isOSDarwin()) - return HasMinSize; - return HasOptSize || HasMinSize; + if (DAG.getMachineFunction().getTarget().getTargetTriple().isOSDarwin()) + return DAG.getMinSize(); + return DAG.getOptForSize(); } static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, @@ -4183,7 +4183,7 @@ bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(DAG); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4296,7 +4296,7 @@ bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(DAG); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4390,7 +4390,7 @@ bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); + bool OptSize = shouldLowerMemFuncForSize(DAG); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3943,8 +3943,7 @@ if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttribute(Attribute::OptimizeForSize) || + if (!DAG.getOptForSize() || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8422,9 +8422,7 @@ return SDValue(); // Don't split at Oz. - MachineFunction &MF = DAG.getMachineFunction(); - bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize); - if (IsMinSize) + if (DAG.getMinSize()) return SDValue(); SDValue StVal = S->getValue(); Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -1827,7 +1827,6 @@ // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -1838,7 +1837,7 @@ CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && Subtarget->hasRAS() && // Emit regular call when code size is the priority - !HasMinSizeAttr) + !DAG.getMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -462,7 +462,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. - OptForSize = MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + OptForSize = CurDAG->getOptForSize(); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -5188,8 +5188,6 @@ // TODO: If multiple splats are generated to load the same constant, // it may be detrimental to overall size. There needs to be a way to detect // that condition to know if this is truly a size win. - const Function *F = DAG.getMachineFunction().getFunction(); - bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize); // Handle broadcasting a single constant scalar from the constant pool // into a vector. @@ -5197,7 +5195,7 @@ // from the constant pool and not to broadcast it from a scalar. // But override that restriction when optimizing for size. // TODO: Check if splatting is recommended for other AVX-capable CPUs. - if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) { + if (ConstSplatVal && (Subtarget->hasAVX2() || DAG.getOptForSize())) { EVT CVT = Ld.getValueType(); assert(!CVT.isVector() && "Must not broadcast a vector type"); @@ -5206,7 +5204,7 @@ // with AVX2, also splat i8 and i16. // With pattern matching, the VBROADCAST node may become a VMOVDDUP. if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || - (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) { + (DAG.getOptForSize() && (ScalarSize == 64 || Subtarget->hasAVX2()))) { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast(Ld)) C = CI->getConstantIntValue(); @@ -11118,9 +11116,7 @@ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may // combine either bitwise AND or insert of float 0.0 to set these bits. - const Function *F = DAG.getMachineFunction().getFunction(); - bool MinSize = F->hasFnAttribute(Attribute::MinSize); - if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) { + if (IdxVal == 0 && (!DAG.getMinSize() || !MayFoldLoad(N1))) { // If this is an insertion of 32-bits into the low 32-bits of // a vector, we prefer to generate a blend with immediate rather // than an insertps. Blends are simpler operations in hardware and so @@ -13195,9 +13191,7 @@ // if we're optimizing for size, however, as that'll allow better folding // of memory operations. if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 && - !DAG.getMachineFunction().getFunction()->hasFnAttribute( - Attribute::MinSize) && - !Subtarget->isAtom()) { + !DAG.getMinSize() && !Subtarget->isAtom()) { unsigned ExtendOp = isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0); @@ -23961,16 +23955,13 @@ return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) - MachineFunction &MF = DAG.getMachineFunction(); - bool OptForSize = - MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); // SHLD/SHRD instructions have lower register pressure, but on some // platforms they have higher latency than the equivalent // series of shifts/or that would otherwise be generated. // Don't fold (or (x << c) | (y >> (64 - c))) if SHLD/SHRD instructions // have higher latencies and we are not optimizing for size. - if (!OptForSize && Subtarget->isSHLDSlow()) + if (!DAG.getOptForSize() && Subtarget->isSHLDSlow()) return SDValue(); if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) Index: test/CodeGen/X86/splat-for-size.ll =================================================================== --- test/CodeGen/X86/splat-for-size.ll +++ test/CodeGen/X86/splat-for-size.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: retq } -define <4 x double> @splat_v4f64(<4 x double> %x) #0 { +define <4 x double> @splat_v4f64(<4 x double> %x) #1 { %add = fadd <4 x double> %x, ret <4 x double> %add ; CHECK-LABEL: splat_v4f64 @@ -32,7 +32,7 @@ ; CHECK-NEXT: retq } -define <8 x float> @splat_v8f32(<8 x float> %x) #0 { +define <8 x float> @splat_v8f32(<8 x float> %x) #1 { %add = fadd <8 x float> %x, ret <8 x float> %add ; CHECK-LABEL: splat_v8f32 @@ -54,7 +54,7 @@ ; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors, ; and then we fake it: use vmovddup to splat 64-bit value. -define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 { +define <4 x i64> @splat_v4i64(<4 x i64> %x) #1 { %add = add <4 x i64> %x, ret <4 x i64> %add ; CHECK-LABEL: splat_v4i64 @@ -78,7 +78,7 @@ } ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value. -define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 { +define <8 x i32> @splat_v8i32(<8 x i32> %x) #1 { %add = add <8 x i32> %x, ret <8 x i32> %add ; CHECK-LABEL: splat_v8i32 @@ -102,7 +102,7 @@ } ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc? -define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 { +define <16 x i16> @splat_v16i16(<16 x i16> %x) #1 { %add = add <16 x i16> %x, ret <16 x i16> %add ; CHECK-LABEL: splat_v16i16 @@ -126,7 +126,7 @@ } ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc? -define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 { +define <32 x i8> @splat_v32i8(<32 x i8> %x) #1 { %add = add <32 x i8> %x, ret <32 x i8> %add ; CHECK-LABEL: splat_v32i8 @@ -153,3 +153,5 @@ } attributes #0 = { optsize } +; The minsize attribute (corresponds to -Oz) always implies optimizing for size too. +attributes #1 = { minsize } Index: test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll =================================================================== --- test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll +++ test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll @@ -9,7 +9,7 @@ ; return (a << 10) | (b >> 54); ; } -; Function Attrs: minsize nounwind optsize readnone uwtable +; Function Attrs: minsize nounwind readnone uwtable define i64 @_Z8lshift10mm(i64 %a, i64 %b) #0 { entry: ; CHECK: shldq $10 @@ -19,7 +19,7 @@ ret i64 %or } -attributes #0 = { minsize nounwind optsize readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { minsize nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ; clang -Os -c test2.cpp -emit-llvm -S