Skip to content

Commit 1166f2f

Browse files
committedJul 30, 2015
fix memcpy/memset/memmove lowering when optimizing for size
Fixing MinSize attribute handling was discussed in D11363. This is a prerequisite patch to doing that. The handling of OptSize when lowering mem* functions was broken on Darwin because it wants to ignore -Os for these cases, but the existing logic also made it ignore -Oz (MinSize). The Linux change demonstrates a widespread problem. The backend doesn't usually recognize the MinSize attribute by itself; it assumes that if the MinSize attribute exists, then the OptSize attribute must also exist. Fixing this more generally will be a follow-on patch or two. Differential Revision: http://reviews.llvm.org/D11568 llvm-svn: 243693
1 parent 0deb694 commit 1166f2f

File tree

4 files changed

+24
-61
lines changed

4 files changed

+24
-61
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+15-3
Original file line numberDiff line numberDiff line change
@@ -4151,6 +4151,18 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
41514151
return true;
41524152
}
41534153

4154+
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
4155+
const Function *F = MF.getFunction();
4156+
bool HasMinSize = F->hasFnAttribute(Attribute::MinSize);
4157+
bool HasOptSize = F->hasFnAttribute(Attribute::OptimizeForSize);
4158+
4159+
// On Darwin, -Os means optimize for size without hurting performance, so
4160+
// only really optimize for size when -Oz (MinSize) is used.
4161+
if (MF.getTarget().getTargetTriple().isOSDarwin())
4162+
return HasMinSize;
4163+
return HasOptSize || HasMinSize;
4164+
}
4165+
41544166
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
41554167
SDValue Chain, SDValue Dst,
41564168
SDValue Src, uint64_t Size,
@@ -4171,7 +4183,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
41714183
bool DstAlignCanChange = false;
41724184
MachineFunction &MF = DAG.getMachineFunction();
41734185
MachineFrameInfo *MFI = MF.getFrameInfo();
4174-
bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
4186+
bool OptSize = shouldLowerMemFuncForSize(MF);
41754187
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
41764188
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
41774189
DstAlignCanChange = true;
@@ -4284,7 +4296,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
42844296
bool DstAlignCanChange = false;
42854297
MachineFunction &MF = DAG.getMachineFunction();
42864298
MachineFrameInfo *MFI = MF.getFrameInfo();
4287-
bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
4299+
bool OptSize = shouldLowerMemFuncForSize(MF);
42884300
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
42894301
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
42904302
DstAlignCanChange = true;
@@ -4378,7 +4390,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
43784390
bool DstAlignCanChange = false;
43794391
MachineFunction &MF = DAG.getMachineFunction();
43804392
MachineFrameInfo *MFI = MF.getFrameInfo();
4381-
bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
4393+
bool OptSize = shouldLowerMemFuncForSize(MF);
43824394
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
43834395
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
43844396
DstAlignCanChange = true;

‎llvm/lib/Target/ARM/ARMISelLowering.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -964,11 +964,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
964964

965965
//// temporary - rewrite interface to use type
966966
MaxStoresPerMemset = 8;
967-
MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
967+
MaxStoresPerMemsetOptSize = 4;
968968
MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
969-
MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
969+
MaxStoresPerMemcpyOptSize = 2;
970970
MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
971-
MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
971+
MaxStoresPerMemmoveOptSize = 2;
972972

973973
// On ARM arguments smaller than 4 bytes are extended, so all arguments
974974
// are at least 4 bytes aligned.

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+3-5
Original file line numberDiff line numberDiff line change
@@ -1723,14 +1723,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
17231723

17241724
computeRegisterProperties(Subtarget->getRegisterInfo());
17251725

1726-
// On Darwin, -Os means optimize for size without hurting performance,
1727-
// do not reduce the limit.
17281726
MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1729-
MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
1727+
MaxStoresPerMemsetOptSize = 8;
17301728
MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1731-
MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
1729+
MaxStoresPerMemcpyOptSize = 4;
17321730
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1733-
MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
1731+
MaxStoresPerMemmoveOptSize = 4;
17341732
setPrefLoopAlignment(4); // 2^4 bytes.
17351733

17361734
// Predictable cmov don't hurt on atom because it's in-order.

‎llvm/test/CodeGen/X86/memcpy.ll

+3-50
Original file line numberDiff line numberDiff line change
@@ -59,71 +59,24 @@ entry:
5959
; DARWIN: movq
6060
}
6161

62-
; FIXME: Both Linux and Darwin should lower to a memcpy call; minsize is on.
6362
define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize noredzone {
6463
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
6564
ret void
6665
; LINUX-LABEL: test3_minsize:
67-
; LINUX: movq
68-
; LINUX: movq
69-
; LINUX: movq
70-
; LINUX: movq
71-
; LINUX: movq
72-
; LINUX: movq
73-
; LINUX: movq
74-
; LINUX: movq
75-
; LINUX: movq
76-
; LINUX: movq
77-
; LINUX: movq
78-
; LINUX: movq
79-
; LINUX: movq
80-
; LINUX: movq
81-
; LINUX: movq
82-
; LINUX: movq
66+
; LINUX: memcpy
8367

8468
; DARWIN-LABEL: test3_minsize:
85-
; DARWIN: movq
86-
; DARWIN: movq
87-
; DARWIN: movq
88-
; DARWIN: movq
89-
; DARWIN: movq
90-
; DARWIN: movq
91-
; DARWIN: movq
92-
; DARWIN: movq
93-
; DARWIN: movq
94-
; DARWIN: movq
95-
; DARWIN: movq
96-
; DARWIN: movq
97-
; DARWIN: movq
98-
; DARWIN: movq
99-
; DARWIN: movq
100-
; DARWIN: movq
69+
; DARWIN: memcpy
10170
}
10271

103-
; FIXME: Darwin should lower to a memcpy call; minsize is on.
10472
define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind optsize minsize noredzone {
10573
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
10674
ret void
10775
; LINUX-LABEL: test3_minsize_optsize:
10876
; LINUX: memcpy
10977

11078
; DARWIN-LABEL: test3_minsize_optsize:
111-
; DARWIN: movq
112-
; DARWIN: movq
113-
; DARWIN: movq
114-
; DARWIN: movq
115-
; DARWIN: movq
116-
; DARWIN: movq
117-
; DARWIN: movq
118-
; DARWIN: movq
119-
; DARWIN: movq
120-
; DARWIN: movq
121-
; DARWIN: movq
122-
; DARWIN: movq
123-
; DARWIN: movq
124-
; DARWIN: movq
125-
; DARWIN: movq
126-
; DARWIN: movq
79+
; DARWIN: memcpy
12780
}
12881

12982
; Large constant memcpy's should be inlined when not optimizing for size.

0 commit comments

Comments
 (0)
Please sign in to comment.