Skip to content

Commit 9009d29

Browse files
committedMay 26, 2017
[ARM] Fix lowering of misaligned memcpy/memset
Currently getOptimalMemOpType returns i32 for large enough sizes without checking for alignment, leading to poor code generation when misaligned accesses aren't permitted as we generate a word store then later split it up into byte stores. This means we inadvertantly go over the MaxStoresPerMemcpy limit and for memset we splat the memset value into a word then immediately split it up again. Fix this by leaving it up to FindOptimalMemOpLowering to figure out which type to use, but also fix a bug there where it wasn't correctly checking if misaligned memory accesses are allowed. Differential Revision: https://reviews.llvm.org/D33442 llvm-svn: 303990
1 parent ba9d8ba commit 9009d29

File tree

4 files changed

+63
-22
lines changed

4 files changed

+63
-22
lines changed
 

‎llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

+12-12
Original file line numberDiff line numberDiff line change
@@ -4779,23 +4779,23 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
47794779
DAG.getMachineFunction());
47804780

47814781
if (VT == MVT::Other) {
4782-
if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) ||
4783-
TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) {
4784-
VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS);
4785-
} else {
4786-
switch (DstAlign & 7) {
4787-
case 0: VT = MVT::i64; break;
4788-
case 4: VT = MVT::i32; break;
4789-
case 2: VT = MVT::i16; break;
4790-
default: VT = MVT::i8; break;
4791-
}
4792-
}
4793-
4782+
// Use the largest integer type whose alignment constraints are satisfied.
4783+
// We only need to check DstAlign here as SrcAlign is always greater or
4784+
// equal to DstAlign (or zero).
4785+
VT = MVT::i64;
4786+
while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
4787+
!TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
4788+
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
4789+
assert(VT.isInteger());
4790+
4791+
// Find the largest legal integer type.
47944792
MVT LVT = MVT::i64;
47954793
while (!TLI.isTypeLegal(LVT))
47964794
LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
47974795
assert(LVT.isInteger());
47984796

4797+
// If the type we've chosen is larger than the largest legal integer type
4798+
// then use that instead.
47994799
if (VT.bitsGT(LVT))
48004800
VT = LVT;
48014801
}

‎llvm/lib/Target/ARM/ARMISelLowering.cpp

-6
Original file line numberDiff line numberDiff line change
@@ -12147,12 +12147,6 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
1214712147
}
1214812148
}
1214912149

12150-
// Lowering to i32/i16 if the size permits.
12151-
if (Size >= 4)
12152-
return MVT::i32;
12153-
else if (Size >= 2)
12154-
return MVT::i16;
12155-
1215612150
// Let the target-independent logic figure it out.
1215712151
return MVT::Other;
1215812152
}

‎llvm/test/CodeGen/ARM/memcpy-inline.ll

+1-4
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,7 @@ entry:
9595
; CHECK: movt [[REG7:r[0-9]+]], #22866
9696
; CHECK: str [[REG7]]
9797
; CHECK-T1-LABEL: t5:
98-
; CHECK-T1: movs [[TREG3:r[0-9]]],
99-
; CHECK-T1: strb [[TREG3]],
100-
; CHECK-T1: movs [[TREG4:r[0-9]]],
101-
; CHECK-T1: strb [[TREG4]],
98+
; CHECK-T1: bl _memcpy
10299
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
103100
ret void
104101
}

‎llvm/test/CodeGen/ARM/memset-inline.ll

+50
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,56 @@ entry:
3838
ret void
3939
}
4040

41+
define void @t3(i8* %p) {
42+
entry:
43+
; CHECK-7A-LABEL: t3:
44+
; CHECK-7A: muls [[REG:r[0-9]+]],
45+
; CHECK-7A: str [[REG]],
46+
; CHECK-6M-LABEL: t3:
47+
; CHECK-6M-NOT: muls
48+
; CHECK-6M: strb [[REG:r[0-9]+]],
49+
; CHECK-6M: strb [[REG]],
50+
; CHECK-6M: strb [[REG]],
51+
; CHECK-6M: strb [[REG]],
52+
br label %for.body
53+
54+
for.body:
55+
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
56+
%0 = trunc i32 %i to i8
57+
call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false)
58+
call void @something(i8* %p)
59+
%inc = add nuw nsw i32 %i, 1
60+
%exitcond = icmp eq i32 %inc, 255
61+
br i1 %exitcond, label %for.end, label %for.body
62+
63+
for.end:
64+
ret void
65+
}
66+
67+
define void @t4(i8* %p) {
68+
entry:
69+
; CHECK-7A-LABEL: t4:
70+
; CHECK-7A: muls [[REG:r[0-9]+]],
71+
; CHECK-7A: str [[REG]],
72+
; CHECK-6M-LABEL: t4:
73+
; CHECK-6M: muls [[REG:r[0-9]+]],
74+
; CHECK-6M: strh [[REG]],
75+
; CHECK-6M: strh [[REG]],
76+
br label %for.body
77+
78+
for.body:
79+
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
80+
%0 = trunc i32 %i to i8
81+
call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 2, i1 false)
82+
call void @something(i8* %p)
83+
%inc = add nuw nsw i32 %i, 1
84+
%exitcond = icmp eq i32 %inc, 255
85+
br i1 %exitcond, label %for.end, label %for.body
86+
87+
for.end:
88+
ret void
89+
}
90+
4191
declare void @something(i8*) nounwind
4292
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
4393
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind

0 commit comments

Comments
 (0)
Please sign in to comment.