Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4151,6 +4151,18 @@ return true; } +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + const Function *F = MF.getFunction(); + bool HasMinSize = F->hasFnAttribute(Attribute::MinSize); + bool HasOptSize = F->hasFnAttribute(Attribute::OptimizeForSize); + + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return HasMinSize; + return HasOptSize || HasMinSize; +} + static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, @@ -4171,7 +4183,7 @@ bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4284,7 +4296,7 @@ bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -4378,7 +4390,7 @@ bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize); + bool OptSize = shouldLowerMemFuncForSize(MF); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -964,11 +964,11 @@ //// temporary - rewrite interface to use type MaxStoresPerMemset = 8; - MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemsetOptSize = 4; MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores - MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemcpyOptSize = 2; MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores - MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemmoveOptSize = 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -1723,14 +1723,12 @@ computeRegisterProperties(Subtarget->getRegisterInfo()); - // On Darwin, -Os means optimize for size without hurting performance, - // do not reduce the limit. MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores - MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8; + MaxStoresPerMemsetOptSize = 8; MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores - MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemcpyOptSize = 4; MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores - MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemmoveOptSize = 4; setPrefLoopAlignment(4); // 2^4 bytes. // Predictable cmov don't hurt on atom because it's in-order. Index: llvm/trunk/test/CodeGen/X86/memcpy.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/memcpy.ll +++ llvm/trunk/test/CodeGen/X86/memcpy.ll @@ -59,48 +59,16 @@ ; DARWIN: movq } -; FIXME: Both Linux and Darwin should lower to a memcpy call; minsize is on. define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize noredzone { tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false) ret void ; LINUX-LABEL: test3_minsize: -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq -; LINUX: movq +; LINUX: memcpy ; DARWIN-LABEL: test3_minsize: -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq +; DARWIN: memcpy } -; FIXME: Darwin should lower to a memcpy call; minsize is on. define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind optsize minsize noredzone { tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false) ret void @@ -108,22 +76,7 @@ ; LINUX: memcpy ; DARWIN-LABEL: test3_minsize_optsize: -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq +; DARWIN: memcpy } ; Large constant memcpy's should be inlined when not optimizing for size.