Skip to content

Commit fb99a49

Browse files
author
Daniel Neilson
committedFeb 8, 2018
[LoopIdiom] Be more aggressive when setting alignment in memcpy
Summary: This change is part of step five in the series of changes to remove alignment argument from memcpy/memmove/memset in favour of alignment attributes. In particular, this changes the LoopIdiom pass to cease using the old IRBuilder CreateMemCpy single-alignment APIs in favour of the new API that allows setting source and destination alignments independently. This allows us to be slightly more aggressive in setting the alignment of memcpy calls that loop idiom creates. Steps: Step 1) Remove alignment parameter and create alignment parameter attributes for memcpy/memmove/memset. ( rL322965, rC322964, rL322963 ) Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing source and dest alignments. ( rL323597 ) Step 3) Update Clang to use the new IRBuilder API. ( rC323617 ) Step 4) Update Polly to use the new IRBuilder API. ( rL323618 ) Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API, and those that use use MemIntrinsicInst::[get|set]Alignment() to use [get|set]DestAlignment() and [get|set]SourceAlignment() instead. ( rL323886, rL323891, rL324148, rL324273, rL324278, rL324384, rL324395, rL324402 ) Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the MemIntrinsicInst::[get|set]Alignment() methods. Reference http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html llvm-svn: 324626
1 parent fa3e722 commit fb99a49

File tree

2 files changed

+80
-4
lines changed

2 files changed

+80
-4
lines changed
 

‎llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -756,8 +756,8 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
756756
MSIs.insert(MSI);
757757
bool NegStride = SizeInBytes == -Stride;
758758
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
759-
MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
760-
BECount, NegStride, /*IsLoopMemset=*/true);
759+
MSI->getDestAlignment(), SplatValue, MSI, MSIs,
760+
Ev, BECount, NegStride, /*IsLoopMemset=*/true);
761761
}
762762

763763
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -1037,16 +1037,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
10371037
Value *NumBytes =
10381038
Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
10391039

1040-
unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
10411040
CallInst *NewCall = nullptr;
10421041
// Check whether to generate an unordered atomic memcpy:
10431042
// If the load or store are atomic, then they must neccessarily be unordered
10441043
// by previous checks.
10451044
if (!SI->isAtomic() && !LI->isAtomic())
1046-
NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align);
1045+
NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
1046+
LoadBasePtr, LI->getAlignment(), NumBytes);
10471047
else {
10481048
// We cannot allow unaligned ops for unordered load/store, so reject
10491049
// anything where the alignment isn't at least the element size.
1050+
unsigned Align = std::min(SI->getAlignment(), LI->getAlignment());
10501051
if (Align < StoreSize)
10511052
return false;
10521053

‎llvm/test/Transforms/LoopIdiom/basic.ll

+75
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,29 @@ for.end: ; preds = %for.body, %entry
2828
; CHECK-NOT: store
2929
}
3030

31+
; Make sure memset is formed for larger than 1 byte stores, and that the
32+
; alignment of the store is preserved
33+
define void @test1_i16(i16* align 2 %Base, i64 %Size) nounwind ssp {
34+
bb.nph: ; preds = %entry
35+
br label %for.body
36+
37+
for.body: ; preds = %bb.nph, %for.body
38+
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
39+
%I.0.014 = getelementptr i16, i16* %Base, i64 %indvar
40+
store i16 0, i16* %I.0.014, align 2
41+
%indvar.next = add i64 %indvar, 1
42+
%exitcond = icmp eq i64 %indvar.next, %Size
43+
br i1 %exitcond, label %for.end, label %for.body
44+
45+
for.end: ; preds = %for.body, %entry
46+
ret void
47+
; CHECK-LABEL: @test1_i16(
48+
; CHECK: %[[BaseBC:.*]] = bitcast i16* %Base to i8*
49+
; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 1
50+
; CHECK: call void @llvm.memset.p0i8.i64(i8* align 2 %[[BaseBC]], i8 0, i64 %[[Sz]], i1 false)
51+
; CHECK-NOT: store
52+
}
53+
3154
; This is a loop that was rotated but where the blocks weren't merged. This
3255
; shouldn't perturb us.
3356
define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
@@ -169,6 +192,58 @@ for.end: ; preds = %for.body, %entry
169192
; CHECK: ret void
170193
}
171194

195+
;; memcpy formation, check alignment
196+
define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
197+
bb.nph:
198+
br label %for.body
199+
200+
for.body: ; preds = %bb.nph, %for.body
201+
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
202+
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
203+
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
204+
%V = load i32, i32* %I.0.014, align 1
205+
store i32 %V, i32* %DestI, align 4
206+
%indvar.next = add i64 %indvar, 1
207+
%exitcond = icmp eq i64 %indvar.next, %Size
208+
br i1 %exitcond, label %for.end, label %for.body
209+
210+
for.end: ; preds = %for.body, %entry
211+
ret void
212+
; CHECK-LABEL: @test6_dest_align(
213+
; CHECK: %[[Dst:.*]] = bitcast i32* %Dest to i8*
214+
; CHECK: %[[Src:.*]] = bitcast i32* %Base to i8*
215+
; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2
216+
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %[[Dst]], i8* align 1 %[[Src]], i64 %[[Sz]], i1 false)
217+
; CHECK-NOT: store
218+
; CHECK: ret void
219+
}
220+
221+
;; memcpy formation, check alignment
222+
define void @test6_src_align(i32* noalias align 4 %Base, i32* noalias align 1 %Dest, i64 %Size) nounwind ssp {
223+
bb.nph:
224+
br label %for.body
225+
226+
for.body: ; preds = %bb.nph, %for.body
227+
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
228+
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
229+
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
230+
%V = load i32, i32* %I.0.014, align 4
231+
store i32 %V, i32* %DestI, align 1
232+
%indvar.next = add i64 %indvar, 1
233+
%exitcond = icmp eq i64 %indvar.next, %Size
234+
br i1 %exitcond, label %for.end, label %for.body
235+
236+
for.end: ; preds = %for.body, %entry
237+
ret void
238+
; CHECK-LABEL: @test6_src_align(
239+
; CHECK: %[[Dst]] = bitcast i32* %Dest to i8*
240+
; CHECK: %[[Src]] = bitcast i32* %Base to i8*
241+
; CHECK: %[[Sz:[0-9]+]] = shl i64 %Size, 2
242+
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %[[Dst]], i8* align 4 %[[Src]], i64 %[[Sz]], i1 false)
243+
; CHECK-NOT: store
244+
; CHECK: ret void
245+
}
246+
172247

173248
; This is a loop that was rotated but where the blocks weren't merged. This
174249
; shouldn't perturb us.

0 commit comments

Comments
 (0)
Please sign in to comment.